From 5535851a6bda705c21d69b2228f087f92c837b32 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Tue, 26 Nov 2024 00:26:06 -0500 Subject: [PATCH] Support real S3's If-Match semantics As of today [0] S3 now supports the If-Match for in-place conditional writes. This commit adjusts the existing support for S3ConditionalPut::Etag mode for compatibility with real S3's particular semantics, which vary slightly from MinIO and R2. Specifically: * Real S3 can occasionally return 409 Conflict when concurrent If-Match requests are in progress. These requests need to be retried. * Real S3 returns 404 Not Found instead of 412 Precondition Failed when issuing an If-Match request against an object that does not exist. Fix #6799. [0]: https://aws.amazon.com/about-aws/whats-new/2024/11/amazon-s3-functionality-conditional-writes/ --- object_store/src/aws/client.rs | 10 ++++++++++ object_store/src/aws/mod.rs | 20 +++++++++++++++++++- object_store/src/client/retry.rs | 14 +++++++++++++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs index 51c917723ee3..47249685b7bb 100644 --- a/object_store/src/aws/client.rs +++ b/object_store/src/aws/client.rs @@ -290,6 +290,7 @@ pub(crate) struct Request<'a> { payload: Option, use_session_creds: bool, idempotent: bool, + retry_on_conflict: bool, retry_error_body: bool, } @@ -317,6 +318,13 @@ impl<'a> Request<'a> { Self { idempotent, ..self } } + pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self { + Self { + retry_on_conflict, + ..self + } + } + pub(crate) fn retry_error_body(self, retry_error_body: bool) -> Self { Self { retry_error_body, @@ -412,6 +420,7 @@ impl<'a> Request<'a> { self.builder .with_aws_sigv4(credential.authorizer(), sha) .retryable(&self.config.retry_config) + .retry_on_conflict(self.retry_on_conflict) .idempotent(self.idempotent) .retry_error_body(self.retry_error_body) .payload(self.payload) @@ -448,6 +457,7 @@ impl S3Client { config: &self.config, use_session_creds: true, idempotent: false, + retry_on_conflict: false, retry_error_body: false, } } diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs index 81511bad7b08..832173da9e81 100644 --- a/object_store/src/aws/mod.rs +++ b/object_store/src/aws/mod.rs @@ -199,7 +199,25 @@ impl ObjectStore for AmazonS3 { match put { S3ConditionalPut::ETagPutIfNotExists => Err(Error::NotImplemented), S3ConditionalPut::ETagMatch => { - request.header(&IF_MATCH, etag.as_str()).do_put().await + match request + .header(&IF_MATCH, etag.as_str()) + // Real S3 will occasionally report 409 Conflict + // if there are concurrent `If-Match` requests + // in flight, so we need to be prepared to retry + // 409 responses. + .retry_on_conflict(true) + .do_put() + .await + { + // Real S3 reports NotFound rather than PreconditionFailed when the + // object doesn't exist. Convert to PreconditionFailed for + // consistency with R2. This also matches what the HTTP spec + // says the behavior should be. + Err(Error::NotFound { path, source }) => { + Err(Error::Precondition { path, source }) + } + r => r, + } } S3ConditionalPut::Dynamo(d) => { d.conditional_op(&self.client, location, Some(&etag), move || { diff --git a/object_store/src/client/retry.rs b/object_store/src/client/retry.rs index 601bffdec158..a8a8e58de4d0 100644 --- a/object_store/src/client/retry.rs +++ b/object_store/src/client/retry.rs @@ -200,6 +200,7 @@ pub(crate) struct RetryableRequest { sensitive: bool, idempotent: Option, + retry_on_conflict: bool, payload: Option, retry_error_body: bool, @@ -217,6 +218,15 @@ impl RetryableRequest { } } + /// Set whether this request should be retried on a 409 Conflict response. + #[cfg(feature = "aws")] + pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self { + Self { + retry_on_conflict, + ..self + } + } + /// Set whether this request contains sensitive data /// /// This will avoid printing out the URL in error messages @@ -340,7 +350,8 @@ impl RetryableRequest { let status = r.status(); if retries == max_retries || now.elapsed() > retry_timeout - || !status.is_server_error() + || !(status.is_server_error() + || (self.retry_on_conflict && status == StatusCode::CONFLICT)) { return Err(match status.is_client_error() { true => match r.text().await { @@ -467,6 +478,7 @@ impl RetryExt for reqwest::RequestBuilder { idempotent: None, payload: None, sensitive: false, + retry_on_conflict: false, retry_error_body: false, } }