Skip to content

Commit

Permalink
feat(storage): make it possible to disable Content-Type sniffing (#9431)
Browse files Browse the repository at this point in the history
As described in https://togithub.com/google/go-cloud/issues/3298#issuecomment-1856821618, we want to disable automatic `Content-Type` detection when inserting an object to Google Cloud Storage (GCS).

Previously it wasn't possible to disable this auto-detection, even though `googleapi.MediaOptions` provides a `ForceEmptyContentType` option (https://togithub.com/googleapis/google-api-go-client/blob/v0.165.0/googleapi/googleapi.go#L283).

We enable this by adding a `Writer` option to set this value.

Closes https://togithub.com/googleapis/google-cloud-go/issues/9430
  • Loading branch information
stanhu authored Feb 23, 2024
1 parent 3814ee3 commit 0676670
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 34 deletions.
3 changes: 3 additions & 0 deletions storage/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@ type openWriterParams struct {
// attrs - see `Writer.ObjectAttrs`.
// Required.
attrs *ObjectAttrs
// forceEmptyContentType - Disables auto-detect of Content-Type
// Optional.
forceEmptyContentType bool
// conds - see `Writer.o.conds`.
// Optional.
conds *Conditions
Expand Down
30 changes: 16 additions & 14 deletions storage/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1529,16 +1529,17 @@ func newGRPCWriter(c *grpcStorageClient, params *openWriterParams, r io.Reader)
}

return &gRPCWriter{
buf: make([]byte, size),
c: c,
ctx: params.ctx,
reader: r,
bucket: params.bucket,
attrs: params.attrs,
conds: params.conds,
encryptionKey: params.encryptionKey,
sendCRC32C: params.sendCRC32C,
chunkSize: params.chunkSize,
buf: make([]byte, size),
c: c,
ctx: params.ctx,
reader: r,
bucket: params.bucket,
attrs: params.attrs,
conds: params.conds,
encryptionKey: params.encryptionKey,
sendCRC32C: params.sendCRC32C,
chunkSize: params.chunkSize,
forceEmptyContentType: params.forceEmptyContentType,
}
}

Expand All @@ -1557,8 +1558,9 @@ type gRPCWriter struct {
encryptionKey []byte
settings *settings

sendCRC32C bool
chunkSize int
sendCRC32C bool
chunkSize int
forceEmptyContentType bool

// The gRPC client-stream used for sending buffers.
stream storagepb.Storage_BidiWriteObjectClient
Expand Down Expand Up @@ -1857,9 +1859,9 @@ func (w *gRPCWriter) writeObjectSpec() (*storagepb.WriteObjectSpec, error) {
// read copies the data in the reader to the given buffer and reports how much
// data was read into the buffer and if there is no more data to read (EOF).
// Furthermore, if the attrs.ContentType is unset, the first bytes of content
// will be sniffed for a matching content type.
// will be sniffed for a matching content type unless forceEmptyContentType is enabled.
func (w *gRPCWriter) read() (int, bool, error) {
if w.attrs.ContentType == "" {
if w.attrs.ContentType == "" && !w.forceEmptyContentType {
w.reader, w.attrs.ContentType = gax.DetermineContentType(w.reader)
}
// Set n to -1 to start the Read loop.
Expand Down
2 changes: 1 addition & 1 deletion storage/http_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ func (c *httpStorageClient) OpenWriter(params *openWriterParams, opts ...storage
mediaOpts := []googleapi.MediaOption{
googleapi.ChunkSize(params.chunkSize),
}
if c := attrs.ContentType; c != "" {
if c := attrs.ContentType; c != "" || params.forceEmptyContentType {
mediaOpts = append(mediaOpts, googleapi.ContentType(c))
}
if params.chunkRetryDeadline != 0 {
Expand Down
34 changes: 27 additions & 7 deletions storage/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2415,8 +2415,9 @@ func TestIntegration_WriterContentType(t *testing.T) {
multiTransportTest(ctx, t, func(t *testing.T, ctx context.Context, bucket, _ string, client *Client) {
obj := client.Bucket(bucket).Object("content")
testCases := []struct {
content string
setType, wantType string
content string
setType, wantType string
forceEmptyContentType bool
}{
{
// Sniffed content type.
Expand All @@ -2438,9 +2439,17 @@ func TestIntegration_WriterContentType(t *testing.T) {
setType: "image/jpeg",
wantType: "image/jpeg",
},
{
// Content type sniffing disabled.
content: "<html><head><title>My first page</title></head></html>",
setType: "",
wantType: "",
forceEmptyContentType: true,
},
}
for i, tt := range testCases {
if err := writeObject(ctx, obj, tt.setType, []byte(tt.content)); err != nil {
writer := newWriter(ctx, obj, tt.setType, tt.forceEmptyContentType)
if err := writeContents(writer, []byte(tt.content)); err != nil {
t.Errorf("writing #%d: %v", i, err)
}
attrs, err := obj.Attrs(ctx)
Expand Down Expand Up @@ -5649,10 +5658,7 @@ func deleteObjectIfExists(o *ObjectHandle, retryOpts ...RetryOption) error {
return nil
}

func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error {
w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx)
w.ContentType = contentType

func writeContents(w *Writer, contents []byte) error {
if contents != nil {
if _, err := w.Write(contents); err != nil {
_ = w.Close()
Expand All @@ -5662,6 +5668,20 @@ func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, con
return w.Close()
}

func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error {
w := newWriter(ctx, obj, contentType, false)

return writeContents(w, contents)
}

func newWriter(ctx context.Context, obj *ObjectHandle, contentType string, forceEmptyContentType bool) *Writer {
w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx)
w.ContentType = contentType
w.ForceEmptyContentType = forceEmptyContentType

return w
}

func readObject(ctx context.Context, obj *ObjectHandle) ([]byte, error) {
r, err := obj.NewReader(ctx)
if err != nil {
Expand Down
30 changes: 18 additions & 12 deletions storage/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ type Writer struct {
// cancellation.
ChunkRetryDeadline time.Duration

// ForceEmptyContentType is an optional parameter that is used to disable
// auto-detection of Content-Type. By default, if a blank Content-Type
// is provided, then gax.DetermineContentType is called to sniff the type.
ForceEmptyContentType bool

// ProgressFunc can be used to monitor the progress of a large write
// operation. If ProgressFunc is not nil and writing requires multiple
// calls to the underlying service (see
Expand Down Expand Up @@ -180,18 +185,19 @@ func (w *Writer) openWriter() (err error) {
isIdempotent := w.o.conds != nil && (w.o.conds.GenerationMatch >= 0 || w.o.conds.DoesNotExist == true)
opts := makeStorageOpts(isIdempotent, w.o.retry, w.o.userProject)
params := &openWriterParams{
ctx: w.ctx,
chunkSize: w.ChunkSize,
chunkRetryDeadline: w.ChunkRetryDeadline,
bucket: w.o.bucket,
attrs: &w.ObjectAttrs,
conds: w.o.conds,
encryptionKey: w.o.encryptionKey,
sendCRC32C: w.SendCRC32C,
donec: w.donec,
setError: w.error,
progress: w.progress,
setObj: func(o *ObjectAttrs) { w.obj = o },
ctx: w.ctx,
chunkSize: w.ChunkSize,
chunkRetryDeadline: w.ChunkRetryDeadline,
bucket: w.o.bucket,
attrs: &w.ObjectAttrs,
conds: w.o.conds,
encryptionKey: w.o.encryptionKey,
sendCRC32C: w.SendCRC32C,
donec: w.donec,
setError: w.error,
progress: w.progress,
setObj: func(o *ObjectAttrs) { w.obj = o },
forceEmptyContentType: w.ForceEmptyContentType,
}
if err := w.ctx.Err(); err != nil {
return err // short-circuit
Expand Down

0 comments on commit 0676670

Please sign in to comment.