From 3eb4422b61210a2064ea6c5aa03ffc3b52a4d339 Mon Sep 17 00:00:00 2001 From: Doychin Atanasov Date: Fri, 13 Aug 2021 17:08:49 +0300 Subject: [PATCH 1/2] Do not store all objects in memory on list commands Previously all of the object blobs were read into memory on every list command. Even when the list command would've returned nothing. There are many problems with this approach: * It is extremely slow to read all files on every list * At some point if the blobs in the storage are more than the memory on the machine the process will crash with OOM error. This MR makes it so that for most operations the actual blob object contents are not kept in memory. Instead only a small struct (ObjectAttrs) is used. Unfortunately due to the nature of JSON encoding all objects are read at least once from the disk in full. --- fakestorage/bucket_test.go | 28 +-- fakestorage/example_test.go | 16 +- fakestorage/object.go | 128 +++++++++----- fakestorage/object_test.go | 285 ++++++++++++++++++++----------- fakestorage/response.go | 16 +- fakestorage/server_test.go | 20 +-- fakestorage/upload.go | 90 +++++----- fakestorage/upload_test.go | 12 +- internal/backend/backend_test.go | 27 ++- internal/backend/fs.go | 13 +- internal/backend/memory.go | 31 +++- internal/backend/object.go | 16 +- internal/backend/storage.go | 2 +- main.go | 14 +- main_test.go | 80 +++++---- 15 files changed, 491 insertions(+), 287 deletions(-) diff --git a/fakestorage/bucket_test.go b/fakestorage/bucket_test.go index 10a711f3d4..caf65f1e77 100644 --- a/fakestorage/bucket_test.go +++ b/fakestorage/bucket_test.go @@ -17,11 +17,11 @@ import ( func TestServerClientBucketAttrs(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}, - {BucketName: "other_bucket", Name: "static/css/website.css"}, - {BucketName: "dot.bucket", Name: "static/js/app.js"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "other_bucket", Name: "static/css/website.css"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "dot.bucket", Name: "static/js/app.js"}}, } startTime := time.Now() runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -86,7 +86,7 @@ func TestServerClientDeleteBucket(t *testing.T) { t.Run("it returns an error for non-empty buckets", func(t *testing.T) { const bucketName = "non-empty-bucket" - objs := []Object{{BucketName: bucketName, Name: "static/js/app.js"}} + objs := []Object{{ObjectAttrs: ObjectAttrs{BucketName: bucketName, Name: "static/js/app.js"}}} runServersTest(t, objs, func(t *testing.T, server *Server) { client := server.Client() err := client.Bucket(bucketName).Delete(context.Background()) @@ -179,11 +179,11 @@ func TestServerClientBucketAttrsNotFound(t *testing.T) { func TestServerClientListBuckets(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}, - {BucketName: "other_bucket", Name: "static/css/website.css"}, - {BucketName: "dot.bucket", Name: "static/js/app.js"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "other_bucket", Name: "static/css/website.css"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "dot.bucket", Name: "static/js/app.js"}}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -224,9 +224,9 @@ func TestServerClientListBuckets(t *testing.T) { func TestServerClientListObjects(t *testing.T) { objects := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, } dir, err := ioutil.TempDir("", "fakestorage-test-root-") if err != nil { diff --git a/fakestorage/example_test.go b/fakestorage/example_test.go index 9deb960292..f67d0a2407 100644 --- a/fakestorage/example_test.go +++ b/fakestorage/example_test.go @@ -15,9 +15,11 @@ import ( func ExampleServer_Client() { server := fakestorage.NewServer([]fakestorage.Object{ { - BucketName: "some-bucket", - Name: "some/object/file.txt", - Content: []byte("inside the file"), + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "some-bucket", + Name: "some/object/file.txt", + }, + Content: []byte("inside the file"), }, }) defer server.Stop() @@ -40,9 +42,11 @@ func ExampleServer_with_host_port() { server, err := fakestorage.NewServerWithOptions(fakestorage.Options{ InitialObjects: []fakestorage.Object{ { - BucketName: "some-bucket", - Name: "some/object/file.txt", - Content: []byte("inside the file"), + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "some-bucket", + Name: "some/object/file.txt", + }, + Content: []byte("inside the file"), }, }, Host: "127.0.0.1", diff --git a/fakestorage/object.go b/fakestorage/object.go index 6432142e1d..a62593b091 100644 --- a/fakestorage/object.go +++ b/fakestorage/object.go @@ -23,13 +23,13 @@ import ( var errInvalidGeneration = errors.New("invalid generation ID") -// Object represents the object that is stored within the fake server. -type Object struct { +// ObjectAttrs returns only the meta-data about an object without its contents. +type ObjectAttrs struct { BucketName string Name string + Size int64 ContentType string ContentEncoding string - Content []byte // Crc32c checksum of Content. calculated by server when it's upload methods are used. Crc32c string Md5Hash string @@ -43,11 +43,22 @@ type Object struct { Metadata map[string]string } +func (o *ObjectAttrs) id() string { + return o.BucketName + "/" + o.Name +} + +// Object represents the object that is stored within the fake server. +type Object struct { + ObjectAttrs + Content []byte +} + // MarshalJSON for Object to use ACLRule instead of storage.ACLRule func (o Object) MarshalJSON() ([]byte, error) { temp := struct { BucketName string `json:"bucket"` Name string `json:"name"` + Size int64 `json:"-"` ContentType string `json:"contentType"` ContentEncoding string `json:"contentEncoding"` Content []byte `json:"-"` @@ -85,6 +96,7 @@ func (o *Object) UnmarshalJSON(data []byte) error { temp := struct { BucketName string `json:"bucket"` Name string `json:"name"` + Size int64 `json:"-"` ContentType string `json:"contentType"` ContentEncoding string `json:"contentEncoding"` Content []byte `json:"-"` @@ -193,21 +205,18 @@ func (team *projectTeam) UnmarshalJSON(data []byte) error { team.Team = temp.Team return nil } -func (o *Object) id() string { - return o.BucketName + "/" + o.Name -} -type objectList []Object +type objectAttrsList []ObjectAttrs -func (o objectList) Len() int { +func (o objectAttrsList) Len() int { return len(o) } -func (o objectList) Less(i int, j int) bool { +func (o objectAttrsList) Less(i int, j int) bool { return o[i].Name < o[j].Name } -func (o *objectList) Swap(i int, j int) { +func (o *objectAttrsList) Swap(i int, j int) { d := *o d[i], d[j] = d[j], d[i] } @@ -244,7 +253,7 @@ type ListOptions struct { // or an error if the bucket doesn't exist. // // Deprecated: use ListObjectsWithOptions. -func (s *Server) ListObjects(bucketName, prefix, delimiter string, versions bool) ([]Object, []string, error) { +func (s *Server) ListObjects(bucketName, prefix, delimiter string, versions bool) ([]ObjectAttrs, []string, error) { return s.ListObjectsWithOptions(bucketName, ListOptions{ Prefix: prefix, Delimiter: delimiter, @@ -252,15 +261,15 @@ func (s *Server) ListObjects(bucketName, prefix, delimiter string, versions bool }) } -func (s *Server) ListObjectsWithOptions(bucketName string, options ListOptions) ([]Object, []string, error) { +func (s *Server) ListObjectsWithOptions(bucketName string, options ListOptions) ([]ObjectAttrs, []string, error) { backendObjects, err := s.backend.ListObjects(bucketName, options.Versions) if err != nil { return nil, nil, err } - objects := fromBackendObjects(backendObjects) - olist := objectList(objects) + objects := fromBackendObjectsAttrs(backendObjects) + olist := objectAttrsList(objects) sort.Sort(&olist) - var respObjects []Object + var respObjects []ObjectAttrs prefixes := make(map[string]bool) for _, obj := range olist { if strings.HasPrefix(obj.Name, options.Prefix) { @@ -309,19 +318,22 @@ func toBackendObjects(objects []Object) []backend.Object { backendObjects := []backend.Object{} for _, o := range objects { backendObjects = append(backendObjects, backend.Object{ - BucketName: o.BucketName, - Name: o.Name, - Content: o.Content, - ContentType: o.ContentType, - ContentEncoding: o.ContentEncoding, - Crc32c: o.Crc32c, - Md5Hash: o.Md5Hash, - ACL: o.ACL, - Created: getCurrentIfZero(o.Created).Format(timestampFormat), - Deleted: o.Deleted.Format(timestampFormat), - Updated: getCurrentIfZero(o.Updated).Format(timestampFormat), - Generation: o.Generation, - Metadata: o.Metadata, + ObjectAttrs: backend.ObjectAttrs{ + BucketName: o.BucketName, + Name: o.Name, + Size: int64(len(o.Content)), + ContentType: o.ContentType, + ContentEncoding: o.ContentEncoding, + Crc32c: o.Crc32c, + Md5Hash: o.Md5Hash, + ACL: o.ACL, + Created: getCurrentIfZero(o.Created).Format(timestampFormat), + Deleted: o.Deleted.Format(timestampFormat), + Updated: getCurrentIfZero(o.Updated).Format(timestampFormat), + Generation: o.Generation, + Metadata: o.Metadata, + }, + Content: o.Content, }) } return backendObjects @@ -331,9 +343,34 @@ func fromBackendObjects(objects []backend.Object) []Object { backendObjects := []Object{} for _, o := range objects { backendObjects = append(backendObjects, Object{ + ObjectAttrs: ObjectAttrs{ + BucketName: o.BucketName, + Name: o.Name, + Size: int64(len(o.Content)), + ContentType: o.ContentType, + ContentEncoding: o.ContentEncoding, + Crc32c: o.Crc32c, + Md5Hash: o.Md5Hash, + ACL: o.ACL, + Created: convertTimeWithoutError(o.Created), + Deleted: convertTimeWithoutError(o.Deleted), + Updated: convertTimeWithoutError(o.Updated), + Generation: o.Generation, + Metadata: o.Metadata, + }, + Content: o.Content, + }) + } + return backendObjects +} + +func fromBackendObjectsAttrs(objectAttrs []backend.ObjectAttrs) []ObjectAttrs { + oattrs := []ObjectAttrs{} + for _, o := range objectAttrs { + oattrs = append(oattrs, ObjectAttrs{ BucketName: o.BucketName, Name: o.Name, - Content: o.Content, + Size: o.Size, ContentType: o.ContentType, ContentEncoding: o.ContentEncoding, Crc32c: o.Crc32c, @@ -346,7 +383,7 @@ func fromBackendObjects(objects []backend.Object) []Object { Metadata: o.Metadata, }) } - return backendObjects + return oattrs } func convertTimeWithoutError(t string) time.Time { @@ -430,7 +467,7 @@ func (s *Server) getObject(w http.ResponseWriter, r *http.Request) { header.Set("Accept-Ranges", "bytes") return jsonResponse{ header: header, - data: newObjectResponse(obj), + data: newObjectResponse(obj.ObjectAttrs), } }) @@ -455,7 +492,7 @@ func (s *Server) listObjectACL(r *http.Request) jsonResponse { return jsonResponse{status: http.StatusNotFound} } - return jsonResponse{data: newACLListResponse(obj)} + return jsonResponse{data: newACLListResponse(obj.ObjectAttrs)} } func (s *Server) setObjectACL(r *http.Request) jsonResponse { @@ -488,7 +525,7 @@ func (s *Server) setObjectACL(r *http.Request) jsonResponse { s.CreateObject(obj) - return jsonResponse{data: newACLListResponse(obj)} + return jsonResponse{data: newACLListResponse(obj.ObjectAttrs)} } func (s *Server) rewriteObject(r *http.Request) jsonResponse { @@ -523,19 +560,22 @@ func (s *Server) rewriteObject(r *http.Request) jsonResponse { dstBucket := vars["destinationBucket"] newObject := Object{ - BucketName: dstBucket, - Name: vars["destinationObject"], - Content: append([]byte(nil), obj.Content...), - Crc32c: obj.Crc32c, - Md5Hash: obj.Md5Hash, - ACL: obj.ACL, - ContentType: metadata.ContentType, - ContentEncoding: metadata.ContentEncoding, - Metadata: metadata.Metadata, + ObjectAttrs: ObjectAttrs{ + BucketName: dstBucket, + Name: vars["destinationObject"], + Size: int64(len(obj.Content)), + Crc32c: obj.Crc32c, + Md5Hash: obj.Md5Hash, + ACL: obj.ACL, + ContentType: metadata.ContentType, + ContentEncoding: metadata.ContentEncoding, + Metadata: metadata.Metadata, + }, + Content: append([]byte(nil), obj.Content...), } s.CreateObject(newObject) - return jsonResponse{data: newObjectRewriteResponse(newObject)} + return jsonResponse{data: newObjectRewriteResponse(newObject.ObjectAttrs)} } func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) { @@ -662,5 +702,5 @@ func (s *Server) composeObject(r *http.Request) jsonResponse { obj := fromBackendObjects([]backend.Object{backendObj})[0] - return jsonResponse{data: newObjectResponse(obj)} + return jsonResponse{data: newObjectResponse(obj.ObjectAttrs)} } diff --git a/fakestorage/object_test.go b/fakestorage/object_test.go index 3e87efeee6..b52872ae41 100644 --- a/fakestorage/object_test.go +++ b/fakestorage/object_test.go @@ -54,23 +54,73 @@ func getObjectTestCases() objectTestCases { tests := objectTestCases{ { "object but no creation nor modification date", - Object{BucketName: bucketName, Name: "img/low-res/party-01.jpg", Content: []byte(content), ContentType: contentType, ContentEncoding: contentEncoding, Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), Md5Hash: checksum.EncodedHash(hash)}, + Object{ + Content: []byte(content), ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "img/low-res/party-01.jpg", + ContentType: contentType, + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + }, + }, }, { "object with creation and modification dates", - Object{BucketName: bucketName, Name: "img/low-res/party-02.jpg", Content: []byte(content), ContentType: contentType, ContentEncoding: contentEncoding, Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), Md5Hash: checksum.EncodedHash(hash), Created: testInitExecTime, Updated: testInitExecTime}, + Object{ + Content: []byte(content), + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "img/low-res/party-02.jpg", + ContentType: contentType, + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Created: testInitExecTime, + Updated: testInitExecTime, + }, + }, }, { "object with creation, modification dates and generation", - Object{BucketName: bucketName, Name: "img/low-res/party-02.jpg", Content: []byte(content), ContentType: contentType, Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), Md5Hash: checksum.EncodedHash(hash), Created: testInitExecTime, Updated: testInitExecTime, Generation: testInitExecTime.UnixNano()}, + Object{ + Content: []byte(content), + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "img/low-res/party-02.jpg", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Created: testInitExecTime, + Updated: testInitExecTime, + Generation: testInitExecTime.UnixNano(), + }, + }, }, { "object with everything", - Object{BucketName: bucketName, Name: "img/location/meta.jpg", Content: []byte(content), ContentType: contentType, ContentEncoding: contentEncoding, Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), Md5Hash: checksum.EncodedHash(hash), Metadata: map[string]string{"MetaHeader": metaValue}}, + Object{ + Content: []byte(content), + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "img/location/meta.jpg", + ContentType: contentType, + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"MetaHeader": metaValue}, + }, + }, }, { "object with no contents neither dates", - Object{BucketName: bucketName, Name: "video/hi-res/best_video_1080p.mp4", ContentType: "text/html; charset=utf-8"}, + Object{ + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "video/hi-res/best_video_1080p.mp4", + ContentType: "text/html; charset=utf-8", + }, + }, }, } return tests @@ -176,7 +226,10 @@ func TestServerClientObjectAttrsAfterOverwriteWithVersioning(t *testing.T) { metaValue = "MetaValue" ) server.CreateBucketWithOpts(CreateBucketOpts{Name: bucketName, VersioningEnabled: true}) - initialObj := Object{BucketName: bucketName, Name: "img/low-res/party-01.jpg", Content: []byte(content), ContentType: contentType, Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(content)))), Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(content))), Metadata: map[string]string{"MetaHeader": metaValue}} + initialObj := Object{ + Content: []byte(content), + ObjectAttrs: ObjectAttrs{BucketName: bucketName, Name: "img/low-res/party-01.jpg", ContentType: contentType, Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(content)))), Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(content))), Metadata: map[string]string{"MetaHeader": metaValue}}, + } server.CreateObject(initialObj) client := server.Client() objHandle := client.Bucket(bucketName).Object(initialObj.Name) @@ -190,7 +243,10 @@ func TestServerClientObjectAttrsAfterOverwriteWithVersioning(t *testing.T) { // sleep for at least 100ns or more, so the creation time will differ on all platforms. time.Sleep(time.Microsecond) - latestObjVersion := Object{BucketName: bucketName, Name: "img/low-res/party-01.jpg", Content: []byte(content2), ContentType: contentType, Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(content2)))), Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(content2)))} + latestObjVersion := Object{ + Content: []byte(content2), + ObjectAttrs: ObjectAttrs{BucketName: bucketName, Name: "img/low-res/party-01.jpg", ContentType: contentType, Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(content2)))), Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(content2)))}, + } server.CreateObject(latestObjVersion) objHandle = client.Bucket(bucketName).Object(latestObjVersion.Name) latestAttrs, err := objHandle.Attrs(context.TODO()) @@ -230,7 +286,7 @@ func getMetadataHeaderFromAttrs(attrs *storage.ObjectAttrs, headerName string) ( func TestServerClientObjectAttrsErrors(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -275,10 +331,12 @@ func TestServerClientObjectReader(t *testing.T) { ) objs := []Object{ { - BucketName: bucketName, - Name: objectName, - Content: []byte(content), - ContentType: contentType, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + }, + Content: []byte(content), }, } @@ -312,10 +370,12 @@ func TestServerClientObjectRangeReader(t *testing.T) { ) objs := []Object{ { - BucketName: bucketName, - Name: objectName, - Content: []byte(content), - ContentType: contentType, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + }, + Content: []byte(content), }, } @@ -381,10 +441,12 @@ func TestServerClientObjectReaderAfterCreateObject(t *testing.T) { runServersTest(t, nil, func(t *testing.T, server *Server) { server.CreateObject(Object{ - BucketName: bucketName, - Name: objectName, - Content: []byte(content), - ContentType: contentType, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + }, + Content: []byte(content), }) client := server.Client() objHandle := client.Bucket(bucketName).Object(objectName) @@ -417,18 +479,22 @@ func TestServerClientObjectReaderAgainstSpecificGenerations(t *testing.T) { runServersTest(t, nil, func(t *testing.T, server *Server) { server.CreateBucketWithOpts(CreateBucketOpts{Name: bucketName, VersioningEnabled: true}) object1 := Object{ - BucketName: bucketName, - Name: objectName, - Content: []byte(content), - ContentType: contentType, - Generation: 1111, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + Generation: 1111, + }, + Content: []byte(content), } server.CreateObject(object1) object2 := Object{ - BucketName: bucketName, - Name: objectName, - Content: []byte(content + "2"), - ContentType: contentType, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + }, + Content: []byte(content + "2"), } server.CreateObject(object2) client := server.Client() @@ -462,7 +528,7 @@ func TestServerClientObjectReaderAgainstSpecificGenerations(t *testing.T) { func TestServerClientObjectReaderError(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -504,7 +570,7 @@ func TestServerClientObjectReadBucketCNAME(t *testing.T) { expectedBody := "something" opts := Options{ InitialObjects: []Object{ - {BucketName: "mybucket.mydomain.com", Name: "files/txt/text-01.txt", Content: []byte("something")}, + {ObjectAttrs: ObjectAttrs{BucketName: "mybucket.mydomain.com", Name: "files/txt/text-01.txt"}, Content: []byte("something")}, }, } server, err := NewServerWithOptions(opts) @@ -540,15 +606,15 @@ func TestServerClientObjectReadBucketCNAME(t *testing.T) { func getObjectsForListTests() []Object { return []Object{ - {BucketName: "some-bucket", Name: "img/low-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/low-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/low-res/party-03.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}, - {BucketName: "some-bucket", Name: "img/brand.jpg"}, - {BucketName: "some-bucket", Name: "video/hi-res/some_video_1080p.mp4"}, - {BucketName: "other-bucket", Name: "static/css/style.css"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/low-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/low-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/low-res/party-03.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/brand.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "video/hi-res/some_video_1080p.mp4"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "other-bucket", Name: "static/css/style.css"}}, } } @@ -948,13 +1014,16 @@ func TestServiceClientRewriteObject(t *testing.T) { hash := checksum.MD5Hash([]byte(content)) objs := []Object{ { - BucketName: "first-bucket", - Name: "files/some-file.txt", - Content: []byte(content), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), - Md5Hash: checksum.EncodedHash(hash), - Metadata: map[string]string{"foo": "bar"}, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/some-file.txt", + Size: int64(len([]byte(content))), + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"foo": "bar"}, + }, + Content: []byte(content), }, } @@ -1053,21 +1122,25 @@ func TestServiceClientRewriteObjectWithGenerations(t *testing.T) { ) objs := []Object{ { - BucketName: "first-bucket", - Name: "files/some-file.txt", - Content: []byte(overwrittenContent), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(overwrittenContent)))), - Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(overwrittenContent))), - Generation: overwrittenGeneration, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/some-file.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(overwrittenContent)))), + Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(overwrittenContent))), + Generation: overwrittenGeneration, + }, + Content: []byte(overwrittenContent), }, { - BucketName: "first-bucket", - Name: "files/some-file.txt", - Content: []byte(latestContent), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(latestContent)))), - Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(latestContent))), + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/some-file.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(uint32Checksum([]byte(latestContent)))), + Md5Hash: checksum.EncodedHash(checksum.MD5Hash([]byte(latestContent))), + }, + Content: []byte(latestContent), }, } tests := []struct { @@ -1176,7 +1249,7 @@ func TestServerClientObjectDelete(t *testing.T) { content = "some nice content" ) objs := []Object{ - {BucketName: bucketName, Name: objectName, Content: []byte(content)}, + {ObjectAttrs: ObjectAttrs{BucketName: bucketName, Name: objectName}, Content: []byte(content)}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -1194,7 +1267,7 @@ func TestServerClientObjectDelete(t *testing.T) { } func TestServerClientObjectDeleteWithVersioning(t *testing.T) { - obj := Object{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg", Content: []byte("some nice content"), Generation: 123} + obj := Object{ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg", Generation: 123}, Content: []byte("some nice content")} runServersTest(t, nil, func(t *testing.T, server *Server) { server.CreateBucketWithOpts(CreateBucketOpts{Name: obj.BucketName, VersioningEnabled: true}) @@ -1222,7 +1295,7 @@ func TestServerClientObjectDeleteWithVersioning(t *testing.T) { func TestServerClientObjectDeleteErrors(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -1257,7 +1330,7 @@ func TestServerClientObjectDeleteErrors(t *testing.T) { func TestServerClientObjectSetAclPrivate(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "img/public-to-private.jpg"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/public-to-private.jpg"}}, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -1303,10 +1376,12 @@ func TestServerClientObjectPatchMetadata(t *testing.T) { ) objs := []Object{ { - BucketName: bucketName, - Name: objectName, - Content: []byte(content), - ContentType: contentType, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + ContentType: contentType, + }, + Content: []byte(content), }, } runServersTest(t, objs, func(t *testing.T, server *Server) { @@ -1365,10 +1440,12 @@ func TestParseRangeRequest(t *testing.T) { srv, _ := NewServerWithOptions(Options{ InitialObjects: []Object{ { - BucketName: "test-bucket", - Name: "test-object", - ContentType: "text/plain", - Content: in, + ObjectAttrs: ObjectAttrs{ + BucketName: "test-bucket", + Name: "test-object", + ContentType: "text/plain", + }, + Content: in, }, }, NoListener: true, @@ -1424,40 +1501,48 @@ func TestServiceClientComposeObject(t *testing.T) { objs := []Object{ { - BucketName: "first-bucket", - Name: "files/source1.txt", - Content: []byte(source1Content), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), - Md5Hash: checksum.EncodedHash(hash), - Metadata: map[string]string{"foo": "bar"}, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/source1.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"foo": "bar"}, + }, + Content: []byte(source1Content), }, { - BucketName: "first-bucket", - Name: "files/source2.txt", - Content: []byte(source2Content), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), - Md5Hash: checksum.EncodedHash(hash), - Metadata: map[string]string{"foo": "bar"}, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/source2.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"foo": "bar"}, + }, + Content: []byte(source2Content), }, { - BucketName: "first-bucket", - Name: "files/source3.txt", - Content: []byte(source3Content), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), - Md5Hash: checksum.EncodedHash(hash), - Metadata: map[string]string{"foo": "bar"}, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/source3.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"foo": "bar"}, + }, + Content: []byte(source3Content), }, { - BucketName: "first-bucket", - Name: "files/destination.txt", - Content: []byte("test"), - ContentType: contentType, - Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), - Md5Hash: checksum.EncodedHash(hash), - Metadata: map[string]string{"foo": "bar"}, + ObjectAttrs: ObjectAttrs{ + BucketName: "first-bucket", + Name: "files/destination.txt", + ContentType: contentType, + Crc32c: checksum.EncodedChecksum(uint32ToBytes(u32Checksum)), + Md5Hash: checksum.EncodedHash(hash), + Metadata: map[string]string{"foo": "bar"}, + }, + Content: []byte("test"), }, } diff --git a/fakestorage/response.go b/fakestorage/response.go index a8456e4624..fe46bde8d5 100644 --- a/fakestorage/response.go +++ b/fakestorage/response.go @@ -47,7 +47,7 @@ func newBucketResponse(bucket backend.Bucket) bucketResponse { } } -func newListObjectsResponse(objs []Object, prefixes []string) listResponse { +func newListObjectsResponse(objs []ObjectAttrs, prefixes []string) listResponse { resp := listResponse{ Kind: "storage#objects", Items: make([]interface{}, len(objs)), @@ -98,7 +98,7 @@ type objectResponse struct { Metadata map[string]string `json:"metadata,omitempty"` } -func newObjectResponse(obj Object) objectResponse { +func newObjectResponse(obj ObjectAttrs) objectResponse { acl := getAccessControlsListFromObject(obj) return objectResponse{ @@ -106,7 +106,7 @@ func newObjectResponse(obj Object) objectResponse { ID: obj.id(), Bucket: obj.BucketName, Name: obj.Name, - Size: int64(len(obj.Content)), + Size: obj.Size, ContentType: obj.ContentType, ContentEncoding: obj.ContentEncoding, Crc32c: obj.Crc32c, @@ -124,14 +124,14 @@ type aclListResponse struct { Items []*objectAccessControl `json:"items"` } -func newACLListResponse(obj Object) aclListResponse { +func newACLListResponse(obj ObjectAttrs) aclListResponse { if len(obj.ACL) == 0 { return aclListResponse{} } return aclListResponse{Items: getAccessControlsListFromObject(obj)} } -func getAccessControlsListFromObject(obj Object) []*objectAccessControl { +func getAccessControlsListFromObject(obj ObjectAttrs) []*objectAccessControl { aclItems := make([]*objectAccessControl, len(obj.ACL)) for idx, aclRule := range obj.ACL { aclItems[idx] = &objectAccessControl{ @@ -153,11 +153,11 @@ type rewriteResponse struct { Resource objectResponse `json:"resource"` } -func newObjectRewriteResponse(obj Object) rewriteResponse { +func newObjectRewriteResponse(obj ObjectAttrs) rewriteResponse { return rewriteResponse{ Kind: "storage#rewriteResponse", - TotalBytesRewritten: int64(len(obj.Content)), - ObjectSize: int64(len(obj.Content)), + TotalBytesRewritten: obj.Size, + ObjectSize: obj.Size, Done: true, RewriteToken: "", Resource: newObjectResponse(obj), diff --git a/fakestorage/server_test.go b/fakestorage/server_test.go index a26e1c076f..2a34a42b23 100644 --- a/fakestorage/server_test.go +++ b/fakestorage/server_test.go @@ -16,10 +16,10 @@ import ( func TestNewServer(t *testing.T) { t.Parallel() server := NewServer([]Object{ - {BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}, - {BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}, - {BucketName: "other-bucket", Name: "static/css/website.css"}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-01.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "other-bucket", Name: "static/css/website.css"}}, }) defer server.Stop() url := server.URL() @@ -135,10 +135,10 @@ func TestPublicURL(t *testing.T) { func TestDownloadObject(t *testing.T) { objs := []Object{ - {BucketName: "some-bucket", Name: "files/txt/text-01.txt", Content: []byte("something")}, - {BucketName: "some-bucket", Name: "files/txt/text-02.txt"}, - {BucketName: "some-bucket", Name: "files/txt/text-03.txt"}, - {BucketName: "other-bucket", Name: "static/css/website.css", Content: []byte("body {display: none;}")}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "files/txt/text-01.txt"}, Content: []byte("something")}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "files/txt/text-02.txt"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "files/txt/text-03.txt"}}, + {ObjectAttrs: ObjectAttrs{BucketName: "other-bucket", Name: "static/css/website.css"}, Content: []byte("body {display: none;}")}, } runServersTest(t, objs, testDownloadObject) runServersTest(t, objs, testDownloadObjectRange) @@ -302,8 +302,8 @@ func TestDownloadObjectAlternatePublicHost(t *testing.T) { }, } objs := []Object{ - {BucketName: "some-bucket", Name: "files/txt/text-01.txt", Content: []byte("something")}, - {BucketName: "other-bucket", Name: "static/css/website.css", Content: []byte("body {display: none;}")}, + {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "files/txt/text-01.txt"}, Content: []byte("something")}, + {ObjectAttrs: ObjectAttrs{BucketName: "other-bucket", Name: "static/css/website.css"}, Content: []byte("body {display: none;}")}, } opts := Options{ InitialObjects: objs, diff --git a/fakestorage/upload.go b/fakestorage/upload.go index 511f856299..ab50369279 100644 --- a/fakestorage/upload.go +++ b/fakestorage/upload.go @@ -128,15 +128,17 @@ func (s *Server) insertFormObject(r *http.Request) xmlResponse { return xmlResponse{errorMessage: err.Error()} } obj := Object{ - BucketName: bucketName, - Name: name, - Content: data, - ContentType: contentType, - ContentEncoding: contentEncoding, - Crc32c: checksum.EncodedCrc32cChecksum(data), - Md5Hash: checksum.EncodedMd5Hash(data), - ACL: getObjectACL(predefinedACL), - Metadata: metaData, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: name, + ContentType: contentType, + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedCrc32cChecksum(data), + Md5Hash: checksum.EncodedMd5Hash(data), + ACL: getObjectACL(predefinedACL), + Metadata: metaData, + }, + Content: data, } obj, err = s.createObject(obj) if err != nil { @@ -181,14 +183,16 @@ func (s *Server) simpleUpload(bucketName string, r *http.Request) jsonResponse { return jsonResponse{errorMessage: err.Error()} } obj := Object{ - BucketName: bucketName, - Name: name, - Content: data, - ContentType: r.Header.Get(contentTypeHeader), - ContentEncoding: contentEncoding, - Crc32c: checksum.EncodedCrc32cChecksum(data), - Md5Hash: checksum.EncodedMd5Hash(data), - ACL: getObjectACL(predefinedACL), + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: name, + ContentType: r.Header.Get(contentTypeHeader), + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedCrc32cChecksum(data), + Md5Hash: checksum.EncodedMd5Hash(data), + ACL: getObjectACL(predefinedACL), + }, + Content: data, } obj, err = s.createObject(obj) if err != nil { @@ -221,15 +225,17 @@ func (s *Server) signedUpload(bucketName string, r *http.Request) jsonResponse { return jsonResponse{errorMessage: err.Error()} } obj := Object{ - BucketName: bucketName, - Name: name, - Content: data, - ContentType: r.Header.Get(contentTypeHeader), - ContentEncoding: contentEncoding, - Crc32c: checksum.EncodedCrc32cChecksum(data), - Md5Hash: checksum.EncodedMd5Hash(data), - ACL: getObjectACL(predefinedACL), - Metadata: metaData, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: name, + ContentType: r.Header.Get(contentTypeHeader), + ContentEncoding: contentEncoding, + Crc32c: checksum.EncodedCrc32cChecksum(data), + Md5Hash: checksum.EncodedMd5Hash(data), + ACL: getObjectACL(predefinedACL), + Metadata: metaData, + }, + Content: data, } obj, err = s.createObject(obj) if err != nil { @@ -299,15 +305,17 @@ func (s *Server) multipartUpload(bucketName string, r *http.Request) jsonRespons } obj := Object{ - BucketName: bucketName, - Name: objName, - Content: content, - ContentType: contentType, - ContentEncoding: metadata.ContentEncoding, - Crc32c: checksum.EncodedCrc32cChecksum(content), - Md5Hash: checksum.EncodedMd5Hash(content), - ACL: getObjectACL(predefinedACL), - Metadata: metadata.Metadata, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objName, + ContentType: contentType, + ContentEncoding: metadata.ContentEncoding, + Crc32c: checksum.EncodedCrc32cChecksum(content), + Md5Hash: checksum.EncodedMd5Hash(content), + ACL: getObjectACL(predefinedACL), + Metadata: metadata.Metadata, + }, + Content: content, } obj, err = s.createObject(obj) if err != nil { @@ -328,11 +336,13 @@ func (s *Server) resumableUpload(bucketName string, r *http.Request) jsonRespons objName = metadata.Name } obj := Object{ - BucketName: bucketName, - Name: objName, - ContentEncoding: contentEncoding, - ACL: getObjectACL(predefinedACL), - Metadata: metadata.Metadata, + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objName, + ContentEncoding: contentEncoding, + ACL: getObjectACL(predefinedACL), + Metadata: metadata.Metadata, + }, } uploadID, err := generateUploadID() if err != nil { diff --git a/fakestorage/upload_test.go b/fakestorage/upload_test.go index c431d2e89c..8c3a0f2127 100644 --- a/fakestorage/upload_test.go +++ b/fakestorage/upload_test.go @@ -132,10 +132,12 @@ func TestServerClientObjectWriterOverwrite(t *testing.T) { const content = "other content" const contentType = "text/plain" server.CreateObject(Object{ - BucketName: "some-bucket", - Name: "some-object.txt", - Content: []byte("some content"), - ContentType: "some-stff", + ObjectAttrs: ObjectAttrs{ + BucketName: "some-bucket", + Name: "some-object.txt", + ContentType: "some-stff", + }, + Content: []byte("some content"), }) objHandle := server.Client().Bucket("some-bucket").Object("some-object.txt") w := objHandle.NewWriter(context.Background()) @@ -324,7 +326,7 @@ func TestServerClientSignedUploadBucketCNAME(t *testing.T) { expectedHash := "bHupxaFBQh4cA8uYB8l8dA==" opts := Options{ InitialObjects: []Object{ - {BucketName: "mybucket.mydomain.com", Name: "files/txt/text-01.txt", Content: []byte("something")}, + {ObjectAttrs: ObjectAttrs{BucketName: "mybucket.mydomain.com", Name: "files/txt/text-01.txt"}, Content: []byte("something")}, }, } server, err := NewServerWithOptions(opts) diff --git a/internal/backend/backend_test.go b/internal/backend/backend_test.go index da3ffb65d2..b960ca7d53 100644 --- a/internal/backend/backend_test.go +++ b/internal/backend/backend_test.go @@ -116,12 +116,27 @@ func TestObjectCRUD(t *testing.T) { return } - initialObject := Object{BucketName: bucketName, Name: objectName, Content: content1, Crc32c: crc1, Md5Hash: md51} + initialObject := Object{ + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + Crc32c: crc1, + Md5Hash: md51, + }, + Content: content1, + } t.Logf("create an initial object on an empty bucket with versioning %t", versioningEnabled) initialGeneration := uploadAndCompare(t, storage, initialObject) t.Logf("create (update) in existent case with explicit generation and versioning %t", versioningEnabled) - secondVersionWithGeneration := Object{BucketName: bucketName, Name: objectName, Content: content2, Generation: 1234} + secondVersionWithGeneration := Object{ + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: objectName, + Generation: 1234, + }, + Content: content2, + } uploadAndCompare(t, storage, secondVersionWithGeneration) initialObjectFromGeneration, err := storage.GetObjectWithGeneration(initialObject.BucketName, initialObject.Name, initialGeneration) @@ -184,7 +199,13 @@ func TestObjectQueryErrors(t *testing.T) { shouldError(t, err) return } - validObject := Object{BucketName: bucketName, Name: "random-object", Content: []byte("random-content")} + validObject := Object{ + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: "random-object", + }, + Content: []byte("random-content"), + } _, err = storage.CreateObject(validObject) noError(t, err) _, err = storage.GetObjectWithGeneration(validObject.BucketName, validObject.Name, 33333) diff --git a/internal/backend/fs.go b/internal/backend/fs.go index fb47e6ab86..1364a47a78 100644 --- a/internal/backend/fs.go +++ b/internal/backend/fs.go @@ -144,7 +144,7 @@ func (s *storageFS) CreateObject(obj Object) (Object, error) { // ListObjects lists the objects in a given bucket with a given prefix and // delimeter. -func (s *storageFS) ListObjects(bucketName string, versions bool) ([]Object, error) { +func (s *storageFS) ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) { s.mtx.RLock() defer s.mtx.RUnlock() @@ -152,7 +152,7 @@ func (s *storageFS) ListObjects(bucketName string, versions bool) ([]Object, err if err != nil { return nil, err } - objects := []Object{} + objects := []ObjectAttrs{} for _, info := range infos { unescaped, err := url.PathUnescape(info.Name()) if err != nil { @@ -162,7 +162,8 @@ func (s *storageFS) ListObjects(bucketName string, versions bool) ([]Object, err if err != nil { return nil, err } - objects = append(objects, object) + object.Size = int64(len(object.Content)) + objects = append(objects, object.ObjectAttrs) } return objects, nil } @@ -192,6 +193,7 @@ func (s *storageFS) getObject(bucketName, objectName string) (Object, error) { } obj.Name = filepath.ToSlash(objectName) obj.BucketName = bucketName + obj.Size = int64(len(obj.Content)) return obj, nil } @@ -233,12 +235,15 @@ func (s *storageFS) ComposeObject(bucketName string, objectNames []string, desti dest, err := s.GetObject(bucketName, destinationName) if err != nil { - dest = Object{ + oattrs := ObjectAttrs{ BucketName: bucketName, Name: destinationName, ContentType: contentType, Created: time.Now().String(), } + dest = Object{ + ObjectAttrs: oattrs, + } } dest.Content = data diff --git a/internal/backend/memory.go b/internal/backend/memory.go index fb5635c8a4..2084009216 100644 --- a/internal/backend/memory.go +++ b/internal/backend/memory.go @@ -35,6 +35,7 @@ func newBucketInMemory(name string, versioningEnabled bool) bucketInMemory { } func (bm *bucketInMemory) addObject(obj Object) Object { + obj.Size = int64(len(obj.Content)) obj.Generation = getNewGenerationIfZero(obj.Generation) index := findObject(obj, bm.activeObjects, false) if index >= 0 { @@ -193,17 +194,26 @@ func (s *storageMemory) CreateObject(obj Object) (Object, error) { // ListObjects lists the objects in a given bucket with a given prefix and // delimeter. -func (s *storageMemory) ListObjects(bucketName string, versions bool) ([]Object, error) { +func (s *storageMemory) ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) { s.mtx.RLock() defer s.mtx.RUnlock() bucketInMemory, err := s.getBucketInMemory(bucketName) if err != nil { - return []Object{}, err + return []ObjectAttrs{}, err + } + objAttrs := make([]ObjectAttrs, 0, len(bucketInMemory.activeObjects)) + for _, obj := range bucketInMemory.activeObjects { + objAttrs = append(objAttrs, obj.ObjectAttrs) } if !versions { - return bucketInMemory.activeObjects, nil + return objAttrs, nil + } + + archvObjs := make([]ObjectAttrs, 0, len(bucketInMemory.archivedObjects)) + for _, obj := range bucketInMemory.archivedObjects { + archvObjs = append(archvObjs, obj.ObjectAttrs) } - return append(bucketInMemory.activeObjects, bucketInMemory.archivedObjects...), nil + return append(objAttrs, archvObjs...), nil } func (s *storageMemory) GetObject(bucketName, objectName string) (Object, error) { @@ -219,7 +229,7 @@ func (s *storageMemory) GetObjectWithGeneration(bucketName, objectName string, g return Object{}, err } matchGeneration := false - obj := Object{BucketName: bucketName, Name: objectName} + obj := Object{ObjectAttrs: ObjectAttrs{BucketName: bucketName, Name: objectName}} listToConsider := bucketInMemory.activeObjects if generation != 0 { matchGeneration = true @@ -230,6 +240,7 @@ func (s *storageMemory) GetObjectWithGeneration(bucketName, objectName string, g if index < 0 { return obj, errors.New("object not found") } + return listToConsider[index], nil } @@ -278,10 +289,12 @@ func (s *storageMemory) ComposeObject(bucketName string, objectNames []string, d dest, err := s.GetObject(bucketName, destinationName) if err != nil { dest = Object{ - BucketName: bucketName, - Name: destinationName, - ContentType: contentType, - Created: time.Now().String(), + ObjectAttrs: ObjectAttrs{ + BucketName: bucketName, + Name: destinationName, + ContentType: contentType, + Created: time.Now().String(), + }, } } diff --git a/internal/backend/object.go b/internal/backend/object.go index 93185a17aa..7fc2e560a8 100644 --- a/internal/backend/object.go +++ b/internal/backend/object.go @@ -10,13 +10,13 @@ import ( "cloud.google.com/go/storage" ) -// Object represents the object that is stored within the fake server. -type Object struct { +// ObjectAttrs represents the meta-data without its contents. +type ObjectAttrs struct { BucketName string `json:"-"` Name string `json:"-"` + Size int64 `json:"-"` ContentType string ContentEncoding string - Content []byte Crc32c string Md5Hash string ACL []storage.ACLRule @@ -28,11 +28,17 @@ type Object struct { } // ID is used for comparing objects. -func (o *Object) ID() string { +func (o *ObjectAttrs) ID() string { return fmt.Sprintf("%s#%d", o.IDNoGen(), o.Generation) } // IDNoGen does not consider the generation field. -func (o *Object) IDNoGen() string { +func (o *ObjectAttrs) IDNoGen() string { return fmt.Sprintf("%s/%s", o.BucketName, o.Name) } + +// Object represents the object that is stored within the fake server. +type Object struct { + ObjectAttrs + Content []byte +} diff --git a/internal/backend/storage.go b/internal/backend/storage.go index f69d82a4c6..025fffed3d 100644 --- a/internal/backend/storage.go +++ b/internal/backend/storage.go @@ -13,7 +13,7 @@ type Storage interface { GetBucket(name string) (Bucket, error) DeleteBucket(name string) error CreateObject(obj Object) (Object, error) - ListObjects(bucketName string, versions bool) ([]Object, error) + ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) GetObject(bucketName, objectName string) (Object, error) GetObjectWithGeneration(bucketName, objectName string, generation int64) (Object, error) DeleteObject(bucketName, objectName string) error diff --git a/main.go b/main.go index 652acef91f..221b50f7e6 100644 --- a/main.go +++ b/main.go @@ -92,12 +92,14 @@ func objectsFromBucket(localBucketPath, bucketName string) ([]fakestorage.Object return fmt.Errorf("could not read file %q: %w", path, err) } objects = append(objects, fakestorage.Object{ - BucketName: bucketName, - Name: objectKey, - ContentType: mime.TypeByExtension(filepath.Ext(path)), - Content: fileContent, - Crc32c: checksum.EncodedCrc32cChecksum(fileContent), - Md5Hash: checksum.EncodedMd5Hash(fileContent), + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: bucketName, + Name: objectKey, + ContentType: mime.TypeByExtension(filepath.Ext(path)), + Crc32c: checksum.EncodedCrc32cChecksum(fileContent), + Md5Hash: checksum.EncodedMd5Hash(fileContent), + }, + Content: fileContent, }) } return nil diff --git a/main_test.go b/main_test.go index 3889f4a2b9..256552c991 100644 --- a/main_test.go +++ b/main_test.go @@ -48,10 +48,12 @@ func TestGenerateObjectsFromFiles(t *testing.T) { folder: "testdata/basic", expectedObjects: []fakestorage.Object{ { - BucketName: "sample-bucket", - Name: "some_file.txt", - Content: []byte("Some amazing content to be loaded"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "sample-bucket", + Name: "some_file.txt", + ContentType: testContentType, + }, + Content: []byte("Some amazing content to be loaded"), }, }, expectedEmptyBuckets: []string{"empty-bucket"}, @@ -61,22 +63,28 @@ func TestGenerateObjectsFromFiles(t *testing.T) { folder: "testdata/multi-level", expectedObjects: []fakestorage.Object{ { - BucketName: "some-bucket", - Name: "a/b/c/d/e/f/object1.txt", - Content: []byte("this is object 1\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "some-bucket", + Name: "a/b/c/d/e/f/object1.txt", + ContentType: testContentType, + }, + Content: []byte("this is object 1\n"), }, { - BucketName: "some-bucket", - Name: "a/b/c/d/e/f/object2.txt", - Content: []byte("this is object 2\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "some-bucket", + Name: "a/b/c/d/e/f/object2.txt", + ContentType: testContentType, + }, + Content: []byte("this is object 2\n"), }, { - BucketName: "some-bucket", - Name: "root-object.txt", - Content: []byte("r00t\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "some-bucket", + Name: "root-object.txt", + ContentType: testContentType, + }, + Content: []byte("r00t\n"), }, }, }, @@ -93,28 +101,36 @@ func TestGenerateObjectsFromFiles(t *testing.T) { folder: "testdata/chaos", expectedObjects: []fakestorage.Object{ { - BucketName: "bucket1", - Name: "object1.txt", - Content: []byte("object 1\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "bucket1", + Name: "object1.txt", + ContentType: testContentType, + }, + Content: []byte("object 1\n"), }, { - BucketName: "bucket1", - Name: "object2.txt", - Content: []byte("object 2\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "bucket1", + Name: "object2.txt", + ContentType: testContentType, + }, + Content: []byte("object 2\n"), }, { - BucketName: "bucket2", - Name: "object1.txt", - Content: []byte("object 1\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "bucket2", + Name: "object1.txt", + ContentType: testContentType, + }, + Content: []byte("object 1\n"), }, { - BucketName: "bucket2", - Name: "object2.txt", - Content: []byte("object 2\n"), - ContentType: testContentType, + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "bucket2", + Name: "object2.txt", + ContentType: testContentType, + }, + Content: []byte("object 2\n"), }, }, }, From f2340d3198a311b0cd67e48b4c95caddefb49b34 Mon Sep 17 00:00:00 2001 From: Doychin Atanasov Date: Fri, 13 Aug 2021 17:44:52 +0300 Subject: [PATCH 2/2] List objects: filter files by prefix before reading them Previously all files were read from a bucket before some of them were dropped after prefix test. This is extremely inefficient in light of the fact that all files are actually read into memory for the file system bucket. A moderatly large bucket will cause listing to take many minutes even when eventually only a few results are returned. --- fakestorage/object.go | 27 ++++++++++++++------------- internal/backend/backend_test.go | 4 ++-- internal/backend/fs.go | 7 +++++-- internal/backend/memory.go | 11 +++++++++-- internal/backend/storage.go | 2 +- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/fakestorage/object.go b/fakestorage/object.go index a62593b091..bf8ca6666a 100644 --- a/fakestorage/object.go +++ b/fakestorage/object.go @@ -262,7 +262,7 @@ func (s *Server) ListObjects(bucketName, prefix, delimiter string, versions bool } func (s *Server) ListObjectsWithOptions(bucketName string, options ListOptions) ([]ObjectAttrs, []string, error) { - backendObjects, err := s.backend.ListObjects(bucketName, options.Versions) + backendObjects, err := s.backend.ListObjects(bucketName, options.Prefix, options.Versions) if err != nil { return nil, nil, err } @@ -272,18 +272,19 @@ func (s *Server) ListObjectsWithOptions(bucketName string, options ListOptions) var respObjects []ObjectAttrs prefixes := make(map[string]bool) for _, obj := range olist { - if strings.HasPrefix(obj.Name, options.Prefix) { - objName := strings.Replace(obj.Name, options.Prefix, "", 1) - delimPos := strings.Index(objName, options.Delimiter) - if options.Delimiter != "" && delimPos > -1 { - prefix := obj.Name[:len(options.Prefix)+delimPos+1] - if isInOffset(prefix, options.StartOffset, options.EndOffset) { - prefixes[prefix] = true - } - } else { - if isInOffset(obj.Name, options.StartOffset, options.EndOffset) { - respObjects = append(respObjects, obj) - } + if !strings.HasPrefix(obj.Name, options.Prefix) { + continue + } + objName := strings.Replace(obj.Name, options.Prefix, "", 1) + delimPos := strings.Index(objName, options.Delimiter) + if options.Delimiter != "" && delimPos > -1 { + prefix := obj.Name[:len(options.Prefix)+delimPos+1] + if isInOffset(prefix, options.StartOffset, options.EndOffset) { + prefixes[prefix] = true + } + } else { + if isInOffset(obj.Name, options.StartOffset, options.EndOffset) { + respObjects = append(respObjects, obj) } } } diff --git a/internal/backend/backend_test.go b/internal/backend/backend_test.go index b960ca7d53..efeffe77a7 100644 --- a/internal/backend/backend_test.go +++ b/internal/backend/backend_test.go @@ -150,7 +150,7 @@ func TestObjectCRUD(t *testing.T) { } t.Logf("checking active object is the expected one when versioning is %t", versioningEnabled) - objs, err := storage.ListObjects(bucketName, false) + objs, err := storage.ListObjects(bucketName, "", false) noError(t, err) if len(objs) != 1 { t.Errorf("wrong number of objects returned\nwant 1\ngot %d", len(objs)) @@ -160,7 +160,7 @@ func TestObjectCRUD(t *testing.T) { } t.Logf("checking all object listing is the expected one when versioning is %t", versioningEnabled) - objs, err = storage.ListObjects(bucketName, true) + objs, err = storage.ListObjects(bucketName, "", true) noError(t, err) if versioningEnabled && len(objs) != 2 { t.Errorf("wrong number of objects returned\nwant 2\ngot %d", len(objs)) diff --git a/internal/backend/fs.go b/internal/backend/fs.go index 1364a47a78..b5c057d6c6 100644 --- a/internal/backend/fs.go +++ b/internal/backend/fs.go @@ -111,7 +111,7 @@ func (s *storageFS) GetBucket(name string) (Bucket, error) { // DeleteBucket removes the bucket from the backend. func (s *storageFS) DeleteBucket(name string) error { - objs, err := s.ListObjects(name, false) + objs, err := s.ListObjects(name, "", false) if err != nil { return BucketNotFound } @@ -144,7 +144,7 @@ func (s *storageFS) CreateObject(obj Object) (Object, error) { // ListObjects lists the objects in a given bucket with a given prefix and // delimeter. -func (s *storageFS) ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) { +func (s *storageFS) ListObjects(bucketName string, prefix string, versions bool) ([]ObjectAttrs, error) { s.mtx.RLock() defer s.mtx.RUnlock() @@ -158,6 +158,9 @@ func (s *storageFS) ListObjects(bucketName string, versions bool) ([]ObjectAttrs if err != nil { return nil, fmt.Errorf("failed to unescape object name %s: %w", info.Name(), err) } + if prefix != "" && !strings.HasPrefix(unescaped, prefix) { + continue + } object, err := s.getObject(bucketName, unescaped) if err != nil { return nil, err diff --git a/internal/backend/memory.go b/internal/backend/memory.go index 2084009216..f88e7e2a42 100644 --- a/internal/backend/memory.go +++ b/internal/backend/memory.go @@ -7,6 +7,7 @@ package backend import ( "errors" "fmt" + "strings" "sync" "time" @@ -165,7 +166,7 @@ func (s *storageMemory) getBucketInMemory(name string) (bucketInMemory, error) { // DeleteBucket removes the bucket from the backend. func (s *storageMemory) DeleteBucket(name string) error { - objs, err := s.ListObjects(name, false) + objs, err := s.ListObjects(name, "", false) if err != nil { return BucketNotFound } @@ -194,7 +195,7 @@ func (s *storageMemory) CreateObject(obj Object) (Object, error) { // ListObjects lists the objects in a given bucket with a given prefix and // delimeter. -func (s *storageMemory) ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) { +func (s *storageMemory) ListObjects(bucketName string, prefix string, versions bool) ([]ObjectAttrs, error) { s.mtx.RLock() defer s.mtx.RUnlock() bucketInMemory, err := s.getBucketInMemory(bucketName) @@ -203,6 +204,9 @@ func (s *storageMemory) ListObjects(bucketName string, versions bool) ([]ObjectA } objAttrs := make([]ObjectAttrs, 0, len(bucketInMemory.activeObjects)) for _, obj := range bucketInMemory.activeObjects { + if prefix != "" && !strings.HasPrefix(obj.Name, prefix) { + continue + } objAttrs = append(objAttrs, obj.ObjectAttrs) } if !versions { @@ -211,6 +215,9 @@ func (s *storageMemory) ListObjects(bucketName string, versions bool) ([]ObjectA archvObjs := make([]ObjectAttrs, 0, len(bucketInMemory.archivedObjects)) for _, obj := range bucketInMemory.archivedObjects { + if prefix != "" && !strings.HasPrefix(obj.Name, prefix) { + continue + } archvObjs = append(archvObjs, obj.ObjectAttrs) } return append(objAttrs, archvObjs...), nil diff --git a/internal/backend/storage.go b/internal/backend/storage.go index 025fffed3d..0e54118cf4 100644 --- a/internal/backend/storage.go +++ b/internal/backend/storage.go @@ -13,7 +13,7 @@ type Storage interface { GetBucket(name string) (Bucket, error) DeleteBucket(name string) error CreateObject(obj Object) (Object, error) - ListObjects(bucketName string, versions bool) ([]ObjectAttrs, error) + ListObjects(bucketName string, prefix string, versions bool) ([]ObjectAttrs, error) GetObject(bucketName, objectName string) (Object, error) GetObjectWithGeneration(bucketName, objectName string, generation int64) (Object, error) DeleteObject(bucketName, objectName string) error