From f311d500ed206b2511d613eb9549dc038c58d6f4 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 4 May 2020 12:14:53 +0200 Subject: [PATCH 1/3] Add custom MD5 hasher option Since there is no cleanup for the API clients using the SIMD MD5 needs to be up to the API users to manage. Provide a function that allows to insert a custom hasher. md5-simd package is only used to provide the interface, by default a wrapper for "crypto/md5" is created, so default performance remains unaffected. --- api-put-object-multipart.go | 1 + api-put-object-streaming.go | 8 ++-- api-put-object.go | 4 +- api.go | 92 +++++++++++++++++++++++++------------ go.mod | 1 + go.sum | 5 ++ utils.go | 12 ++++- 7 files changed, 86 insertions(+), 37 deletions(-) diff --git a/api-put-object-multipart.go b/api-put-object-multipart.go index e5bd37d5d..25ae63333 100644 --- a/api-put-object-multipart.go +++ b/api-put-object-multipart.go @@ -118,6 +118,7 @@ func (c Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obje for k, v := range hashAlgos { v.Write(buf[:length]) hashSums[k] = v.Sum(nil) + v.Close() } // Update progress reader appropriately to the latest offset diff --git a/api-put-object-streaming.go b/api-put-object-streaming.go index 4da4174cd..ceb53421b 100644 --- a/api-put-object-streaming.go +++ b/api-put-object-streaming.go @@ -20,7 +20,6 @@ package minio import ( "bytes" "context" - "crypto/md5" "encoding/base64" "fmt" "io" @@ -287,9 +286,11 @@ func (c Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, bu return 0, rerr } // Calculate md5sum. - hash := md5.New() + hash := c.md5Hasher() hash.Write(buf[:length]) md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil)) + hash.Close() + // Update progress reader appropriately to the latest offset // as we read from the source. hookReader = newHook(bytes.NewReader(buf[:length]), opts.Progress) @@ -393,10 +394,11 @@ func (c Client) putObject(ctx context.Context, bucketName, objectName string, re } // Calculate md5sum. - hash := md5.New() + hash := c.md5Hasher() hash.Write(buf[:length]) md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil)) reader = bytes.NewReader(buf[:length]) + hash.Close() } // Update progress reader appropriately to the latest offset as we diff --git a/api-put-object.go b/api-put-object.go index eabe788da..d4188d580 100644 --- a/api-put-object.go +++ b/api-put-object.go @@ -20,7 +20,6 @@ package minio import ( "bytes" "context" - "crypto/md5" "encoding/base64" "errors" "fmt" @@ -267,9 +266,10 @@ func (c Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketName var md5Base64 string if opts.SendContentMd5 { // Calculate md5sum. - hash := md5.New() + hash := c.md5Hasher() hash.Write(buf[:length]) md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil)) + hash.Close() } // Update progress reader appropriately to the latest offset diff --git a/api.go b/api.go index f9bdbb9cd..7cf2a037e 100644 --- a/api.go +++ b/api.go @@ -23,7 +23,6 @@ import ( "crypto/md5" "errors" "fmt" - "hash" "io" "io/ioutil" "math/rand" @@ -38,13 +37,12 @@ import ( "sync" "time" - "github.com/minio/sha256-simd" - - "golang.org/x/net/publicsuffix" - + md5simd "github.com/minio/md5-simd" "github.com/minio/minio-go/v6/pkg/credentials" "github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/minio-go/v6/pkg/signer" + "github.com/minio/sha256-simd" + "golang.org/x/net/publicsuffix" ) // Client implements Amazon S3 compatible methods. @@ -90,6 +88,9 @@ type Client struct { // lookup indicates type of url lookup supported by server. If not specified, // default to Auto. lookup BucketLookupType + + // Factory for MD5 hash functions. + md5Hasher func() md5simd.Hasher } // Options for New method @@ -98,7 +99,9 @@ type Options struct { Secure bool Region string BucketLookup BucketLookupType - // Add future fields here + + // Custom MD5 hasher. Leave nil to use standard. + CustomMD5 func() md5simd.Hasher } // Global constants. @@ -129,8 +132,11 @@ const ( // NewV2 - instantiate minio client with Amazon S3 signature version // '2' compatibility. func NewV2(endpoint string, accessKeyID, secretAccessKey string, secure bool) (*Client, error) { - creds := credentials.NewStaticV2(accessKeyID, secretAccessKey, "") - clnt, err := privateNew(endpoint, creds, secure, "", BucketLookupAuto) + clnt, err := privateNew(endpoint, Options{ + Creds: credentials.NewStaticV2(accessKeyID, secretAccessKey, ""), + Secure: secure, + BucketLookup: BucketLookupAuto, + }) if err != nil { return nil, err } @@ -141,8 +147,11 @@ func NewV2(endpoint string, accessKeyID, secretAccessKey string, secure bool) (* // NewV4 - instantiate minio client with Amazon S3 signature version // '4' compatibility. func NewV4(endpoint string, accessKeyID, secretAccessKey string, secure bool) (*Client, error) { - creds := credentials.NewStaticV4(accessKeyID, secretAccessKey, "") - clnt, err := privateNew(endpoint, creds, secure, "", BucketLookupAuto) + clnt, err := privateNew(endpoint, Options{ + Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), + Secure: secure, + BucketLookup: BucketLookupAuto, + }) if err != nil { return nil, err } @@ -152,8 +161,11 @@ func NewV4(endpoint string, accessKeyID, secretAccessKey string, secure bool) (* // New - instantiate minio client, adds automatic verification of signature. func New(endpoint, accessKeyID, secretAccessKey string, secure bool) (*Client, error) { - creds := credentials.NewStaticV4(accessKeyID, secretAccessKey, "") - clnt, err := privateNew(endpoint, creds, secure, "", BucketLookupAuto) + clnt, err := privateNew(endpoint, Options{ + Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), + Secure: secure, + BucketLookup: BucketLookupAuto, + }) if err != nil { return nil, err } @@ -172,7 +184,12 @@ func New(endpoint, accessKeyID, secretAccessKey string, secure bool) (*Client, e // for retrieving credentials from various credentials provider such as // IAM, File, Env etc. func NewWithCredentials(endpoint string, creds *credentials.Credentials, secure bool, region string) (*Client, error) { - return privateNew(endpoint, creds, secure, region, BucketLookupAuto) + return privateNew(endpoint, Options{ + Creds: creds, + Secure: secure, + BucketLookup: BucketLookupAuto, + Region: region, + }) } // NewWithRegion - instantiate minio client, with region configured. Unlike New(), @@ -180,12 +197,20 @@ func NewWithCredentials(endpoint string, creds *credentials.Credentials, secure // Use this function when if your application deals with single region. func NewWithRegion(endpoint, accessKeyID, secretAccessKey string, secure bool, region string) (*Client, error) { creds := credentials.NewStaticV4(accessKeyID, secretAccessKey, "") - return privateNew(endpoint, creds, secure, region, BucketLookupAuto) + return privateNew(endpoint, Options{ + Creds: creds, + Secure: secure, + Region: region, + BucketLookup: BucketLookupAuto, + }) } // NewWithOptions - instantiate minio client with options func NewWithOptions(endpoint string, opts *Options) (*Client, error) { - return privateNew(endpoint, opts.Creds, opts.Secure, opts.Region, opts.BucketLookup) + if opts == nil { + return nil, errors.New("no options provided") + } + return privateNew(endpoint, *opts) } // EndpointURL returns the URL of the S3 endpoint. @@ -277,9 +302,9 @@ func (c *Client) redirectHeaders(req *http.Request, via []*http.Request) error { return nil } -func privateNew(endpoint string, creds *credentials.Credentials, secure bool, region string, lookup BucketLookupType) (*Client, error) { +func privateNew(endpoint string, opts Options) (*Client, error) { // construct endpoint. - endpointURL, err := getEndpointURL(endpoint, secure) + endpointURL, err := getEndpointURL(endpoint, opts.Secure) if err != nil { return nil, err } @@ -295,15 +320,15 @@ func privateNew(endpoint string, creds *credentials.Credentials, secure bool, re clnt := new(Client) // Save the credentials. - clnt.credsProvider = creds + clnt.credsProvider = opts.Creds // Remember whether we are using https or not - clnt.secure = secure + clnt.secure = opts.Secure // Save endpoint URL, user agent for future uses. clnt.endpointURL = endpointURL - transport, err := DefaultTransport(secure) + transport, err := DefaultTransport(opts.Secure) if err != nil { return nil, err } @@ -316,10 +341,10 @@ func privateNew(endpoint string, creds *credentials.Credentials, secure bool, re } // Sets custom region, if region is empty bucket location cache is used automatically. - if region == "" { - region = s3utils.GetRegionFromURL(*clnt.endpointURL) + if opts.Region == "" { + opts.Region = s3utils.GetRegionFromURL(*clnt.endpointURL) } - clnt.region = region + clnt.region = opts.Region // Instantiate bucket location cache. clnt.bucketLocCache = newBucketLocationCache() @@ -327,9 +352,16 @@ func privateNew(endpoint string, creds *credentials.Credentials, secure bool, re // Introduce a new locked random seed. clnt.random = rand.New(&lockedRandSource{src: rand.NewSource(time.Now().UTC().UnixNano())}) + // Add default md5 hasher. + clnt.md5Hasher = opts.CustomMD5 + if clnt.md5Hasher == nil { + clnt.md5Hasher = func() md5simd.Hasher { + return hashWrapper{Hash: md5.New()} + } + } // Sets bucket lookup style, whether server accepts DNS or Path lookup. Default is Auto - determined // by the SDK. When Auto is specified, DNS lookup is used for Amazon/Google cloud endpoints and Path for all other endpoints. - clnt.lookup = lookup + clnt.lookup = opts.BucketLookup // Return. return clnt, nil } @@ -413,22 +445,22 @@ func (c *Client) SetS3TransferAccelerate(accelerateEndpoint string) { // - For signature v4 request if the connection is insecure compute only sha256. // - For signature v4 request if the connection is secure compute only md5. // - For anonymous request compute md5. -func (c *Client) hashMaterials(isMd5Requested bool) (hashAlgos map[string]hash.Hash, hashSums map[string][]byte) { +func (c *Client) hashMaterials(isMd5Requested bool) (hashAlgos map[string]md5simd.Hasher, hashSums map[string][]byte) { hashSums = make(map[string][]byte) - hashAlgos = make(map[string]hash.Hash) + hashAlgos = make(map[string]md5simd.Hasher) if c.overrideSignerType.IsV4() { if c.secure { - hashAlgos["md5"] = md5.New() + hashAlgos["md5"] = c.md5Hasher() } else { - hashAlgos["sha256"] = sha256.New() + hashAlgos["sha256"] = hashWrapper{sha256.New()} } } else { if c.overrideSignerType.IsAnonymous() { - hashAlgos["md5"] = md5.New() + hashAlgos["md5"] = c.md5Hasher() } } if isMd5Requested { - hashAlgos["md5"] = md5.New() + hashAlgos["md5"] = c.md5Hasher() } return hashAlgos, hashSums } diff --git a/go.mod b/go.mod index fe43a735e..d1b08960d 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.12 require ( github.com/dustin/go-humanize v1.0.0 // indirect github.com/json-iterator/go v1.1.9 + github.com/minio/md5-simd v1.0.1 github.com/minio/sha256-simd v0.1.1 github.com/mitchellh/go-homedir v1.1.0 github.com/sirupsen/logrus v1.5.0 // indirect diff --git a/go.sum b/go.sum index 7461ddbf1..a425792fa 100644 --- a/go.sum +++ b/go.sum @@ -10,7 +10,11 @@ github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGn github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/klauspost/cpuid v1.2.3 h1:CCtW0xUnWGVINKvE/WWOYKdsPV6mawAtvQuSl8guwQs= +github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/minio/md5-simd v1.0.1 h1:tj/FH8APTKxIkOGUX2YGAVJVXXC3AJ5T2SkHoT/dUFI= +github.com/minio/md5-simd v1.0.1/go.mod h1:EhdyA+Dr0guvfyc8d6yrgs9YzHGfaI+YIjA6gt/7mJk= github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU= github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= @@ -21,6 +25,7 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLD github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q= github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= diff --git a/utils.go b/utils.go index 685e98c70..4605344f6 100644 --- a/utils.go +++ b/utils.go @@ -22,6 +22,7 @@ import ( "encoding/base64" "encoding/hex" "encoding/xml" + "hash" "io" "io/ioutil" "net" @@ -32,9 +33,8 @@ import ( "strings" "time" - "github.com/minio/sha256-simd" - "github.com/minio/minio-go/v6/pkg/s3utils" + "github.com/minio/sha256-simd" ) func trimEtag(etag string) string { @@ -372,3 +372,11 @@ func isAmzHeader(headerKey string) bool { return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) } + +// hashWrapper implements the md5simd.Hasher interface. +type hashWrapper struct { + hash.Hash +} + +// Close does nothing when wrapped. +func (m hashWrapper) Close() {} From df3b762c9d4055339c981d2634acb298e232998f Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 2 Jun 2020 14:32:14 +0200 Subject: [PATCH 2/3] Upgrade md5-simd --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d1b08960d..03497bccf 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.12 require ( github.com/dustin/go-humanize v1.0.0 // indirect github.com/json-iterator/go v1.1.9 - github.com/minio/md5-simd v1.0.1 + github.com/minio/md5-simd v1.1.0 github.com/minio/sha256-simd v0.1.1 github.com/mitchellh/go-homedir v1.1.0 github.com/sirupsen/logrus v1.5.0 // indirect diff --git a/go.sum b/go.sum index a425792fa..f158252bb 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/minio/md5-simd v1.0.1 h1:tj/FH8APTKxIkOGUX2YGAVJVXXC3AJ5T2SkHoT/dUFI= github.com/minio/md5-simd v1.0.1/go.mod h1:EhdyA+Dr0guvfyc8d6yrgs9YzHGfaI+YIjA6gt/7mJk= +github.com/minio/md5-simd v1.1.0 h1:QPfiOqlZH+Cj9teu0t9b1nTBfPbyTl16Of5MeuShdK4= +github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU= github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= From 8dcc241fe163809b89e59eecbf93852845e4343c Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 2 Jun 2020 16:02:26 +0200 Subject: [PATCH 3/3] Use stdlib md5 by default. * Allow custom sha256. * Add a pool for sha256 and md5 and reuse them. --- api.go | 20 +++++++++++--------- core_test.go | 3 ++- utils.go | 35 +++++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/api.go b/api.go index 9ac03d89f..22778dafa 100644 --- a/api.go +++ b/api.go @@ -20,7 +20,6 @@ package minio import ( "bytes" "context" - "crypto/md5" "errors" "fmt" "io" @@ -41,7 +40,6 @@ import ( "github.com/minio/minio-go/v6/pkg/credentials" "github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/minio-go/v6/pkg/signer" - "github.com/minio/sha256-simd" "golang.org/x/net/publicsuffix" ) @@ -90,7 +88,8 @@ type Client struct { lookup BucketLookupType // Factory for MD5 hash functions. - md5Hasher func() md5simd.Hasher + md5Hasher func() md5simd.Hasher + sha256Hasher func() md5simd.Hasher } // Options for New method @@ -100,8 +99,9 @@ type Options struct { Region string BucketLookup BucketLookupType - // Custom MD5 hasher. Leave nil to use standard. - CustomMD5 func() md5simd.Hasher + // Custom hash routines. Leave nil to use standard. + CustomMD5 func() md5simd.Hasher + CustomSHA256 func() md5simd.Hasher } // Global constants. @@ -354,10 +354,12 @@ func privateNew(endpoint string, opts Options) (*Client, error) { // Add default md5 hasher. clnt.md5Hasher = opts.CustomMD5 + clnt.sha256Hasher = opts.CustomSHA256 if clnt.md5Hasher == nil { - clnt.md5Hasher = func() md5simd.Hasher { - return hashWrapper{Hash: md5.New()} - } + clnt.md5Hasher = newMd5Hasher + } + if clnt.sha256Hasher == nil { + clnt.sha256Hasher = newSHA256Hasher } // Sets bucket lookup style, whether server accepts DNS or Path lookup. Default is Auto - determined // by the SDK. When Auto is specified, DNS lookup is used for Amazon/Google cloud endpoints and Path for all other endpoints. @@ -452,7 +454,7 @@ func (c *Client) hashMaterials(isMd5Requested bool) (hashAlgos map[string]md5sim if c.secure { hashAlgos["md5"] = c.md5Hasher() } else { - hashAlgos["sha256"] = hashWrapper{sha256.New()} + hashAlgos["sha256"] = c.sha256Hasher() } } else { if c.overrideSignerType.IsAnonymous() { diff --git a/core_test.go b/core_test.go index 312efd9e3..ada24605c 100644 --- a/core_test.go +++ b/core_test.go @@ -678,7 +678,8 @@ func TestCorePutObject(t *testing.T) { mustParseBool(os.Getenv(enableSecurity)), ) if err != nil { - t.Fatal("Error:", err) + t.Error("Error:", err) + return } // Enable tracing, write to stderr. diff --git a/utils.go b/utils.go index 75bb0e96b..0214644e4 100644 --- a/utils.go +++ b/utils.go @@ -31,8 +31,10 @@ import ( "regexp" "strconv" "strings" + "sync" "time" + md5simd "github.com/minio/md5-simd" "github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/sha256-simd" ) @@ -50,14 +52,16 @@ func xmlDecoder(body io.Reader, v interface{}) error { // sum256 calculate sha256sum for an input byte array, returns hex encoded. func sum256Hex(data []byte) string { - hash := sha256.New() + hash := newSHA256Hasher() + defer hash.Close() hash.Write(data) return hex.EncodeToString(hash.Sum(nil)) } // sumMD5Base64 calculate md5sum for an input byte array, returns base64 encoded. func sumMD5Base64(data []byte) string { - hash := md5.New() + hash := newMd5Hasher() + defer hash.Close() hash.Write(data) return base64.StdEncoding.EncodeToString(hash.Sum(nil)) } @@ -375,10 +379,33 @@ func isAmzHeader(headerKey string) bool { return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) } +var md5Pool = sync.Pool{New: func() interface{} { return md5.New() }} +var sha256Pool = sync.Pool{New: func() interface{} { return sha256.New() }} + +func newMd5Hasher() md5simd.Hasher { + return hashWrapper{Hash: md5Pool.New().(hash.Hash), isMD5: true} +} + +func newSHA256Hasher() md5simd.Hasher { + return hashWrapper{Hash: sha256Pool.New().(hash.Hash), isSHA256: true} +} + // hashWrapper implements the md5simd.Hasher interface. type hashWrapper struct { hash.Hash + isMD5 bool + isSHA256 bool } -// Close does nothing when wrapped. -func (m hashWrapper) Close() {} +// Close will put the hasher back into the pool. +func (m hashWrapper) Close() { + if m.isMD5 && m.Hash != nil { + m.Reset() + md5Pool.Put(m.Hash) + } + if m.isSHA256 && m.Hash != nil { + m.Reset() + sha256Pool.Put(m.Hash) + } + m.Hash = nil +}