From 633f5b3214ffb5c10b8b0de5774ec7901fc4fbae Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Wed, 15 Jan 2025 15:24:32 -0800
Subject: [PATCH 01/27] Fix comment about how to only run unit tests

---
 opentelemetry-otlp/tests/integration_test/src/test_utils.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/opentelemetry-otlp/tests/integration_test/src/test_utils.rs b/opentelemetry-otlp/tests/integration_test/src/test_utils.rs
index 50570d888a..9037dec865 100644
--- a/opentelemetry-otlp/tests/integration_test/src/test_utils.rs
+++ b/opentelemetry-otlp/tests/integration_test/src/test_utils.rs
@@ -13,7 +13,7 @@
 //! Only a single test suite can run at once, as each container has statically mapped ports, but
 //! this works nicely with the way cargo executes the suite.
 //!
-//! To skip integration tests with cargo, you can run `cargo test --mod`, which will run unit tests
+//! To skip integration tests with cargo, you can run `cargo test --lib`, which will run unit tests
 //! only.
 //!
 #![cfg(unix)]

From b3115f50cd22876978cd817c518ba7417c953b3d Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Wed, 15 Jan 2025 15:43:25 -0800
Subject: [PATCH 02/27] Remove extraneous lines from test_with_gzip_compression
 unit test

---
 opentelemetry-otlp/src/exporter/tonic/mod.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index a71a843e40..6986107602 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -564,9 +564,6 @@ mod tests {
     #[test]
     #[cfg(feature = "gzip-tonic")]
     fn test_with_gzip_compression() {
-        // metadata should merge with the current one with priority instead of just replacing it
-        let mut metadata = MetadataMap::new();
-        metadata.insert("foo", "bar".parse().unwrap());
         let builder = TonicExporterBuilder::default().with_compression(Compression::Gzip);
         assert_eq!(builder.tonic_config.compression.unwrap(), Compression::Gzip);
     }

From 18dae97fc185cac01cd3dfb376a6e9ee1f1c257b Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Thu, 27 Feb 2025 10:38:16 -0800
Subject: [PATCH 03/27] Add retry_with_exponential_backoff method; Use in tonic
 logs client

---
 opentelemetry-otlp/src/exporter/http/mod.rs   |  2 +-
 opentelemetry-otlp/src/exporter/tonic/logs.rs | 81 ++++++++++++-------
 opentelemetry-otlp/src/lib.rs                 |  1 +
 opentelemetry-otlp/src/retry.rs               | 47 +++++++++++
 opentelemetry-proto/src/transform/logs.rs     |  8 +-
 5 files changed, 105 insertions(+), 34 deletions(-)
 create mode 100644 opentelemetry-otlp/src/retry.rs

diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 4dbcb603f4..5cb25ea37e 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -388,7 +388,7 @@ impl OtlpHttpClient {
         logs: LogBatch<'_>,
     ) -> Result<(Vec<u8>, &'static str, Option<&'static str>), String> {
         use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
-        let resource_logs = group_logs_by_resource_and_scope(logs, &self.resource);
+        let resource_logs = group_logs_by_resource_and_scope(&logs, &self.resource);
         let req = ExportLogsServiceRequest { resource_logs };
 
         let (body, content_type) = match self.protocol {
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 9f8b9d8a6d..578a35b232 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -1,3 +1,4 @@
+use std::sync::Arc;
 use core::fmt;
 use opentelemetry::otel_debug;
 use opentelemetry_proto::tonic::collector::logs::v1::{
@@ -13,6 +14,8 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 
 use super::BoxInterceptor;
 
+use crate::retry::{retry_with_exponential_backoff, RetryPolicy};
+
 pub(crate) struct TonicLogsClient {
     inner: Mutex<Option<ClientInner>>,
     #[allow(dead_code)]
@@ -58,41 +61,61 @@ impl TonicLogsClient {
 
 impl LogExporter for TonicLogsClient {
     async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
-        let (mut client, metadata, extensions) = match self.inner.lock().await.as_mut() {
-            Some(inner) => {
-                let (m, e, _) = inner
-                    .interceptor
-                    .call(Request::new(()))
-                    .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
-                    .into_parts();
-                (inner.client.clone(), m, e)
-            }
-            None => return Err(OTelSdkError::AlreadyShutdown),
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
         };
 
-        let resource_logs = group_logs_by_resource_and_scope(batch, &self.resource);
+        let batch = Arc::new(batch);
 
-        otel_debug!(name: "TonicLogsClient.ExportStarted");
+        retry_with_exponential_backoff(policy, "TonicLogsClient.Export", {
+            let batch = Arc::clone(&batch);
+            let inner = &self.inner;
+            let resource = &self.resource;
+            move || {
+                let batch = Arc::clone(&batch);
+                Box::pin(async move {
+                    let (mut client, metadata, extensions) = match inner.lock().await.as_mut() {
+                        Some(inner) => {
+                            let (m, e, _) = inner
+                                .interceptor
+                                .call(Request::new(()))
+                                .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
+                                .into_parts();
+                            (inner.client.clone(), m, e)
+                        }
+                        None => return Err(OTelSdkError::AlreadyShutdown),
+                    };
 
-        let result = client
-            .export(Request::from_parts(
-                metadata,
-                extensions,
-                ExportLogsServiceRequest { resource_logs },
-            ))
-            .await;
+                    let resource_logs = group_logs_by_resource_and_scope(&*batch, resource);
 
-        match result {
-            Ok(_) => {
-                otel_debug!(name: "TonicLogsClient.ExportSucceeded");
-                Ok(())
-            }
-            Err(e) => {
-                let error = format!("export error: {e:?}");
-                otel_debug!(name: "TonicLogsClient.ExportFailed", error = &error);
-                Err(OTelSdkError::InternalFailure(error))
+                    otel_debug!(name: "TonicsLogsClient.ExportStarted");
+
+                    let result = client
+                        .export(Request::from_parts(
+                            metadata,
+                            extensions,
+                            ExportLogsServiceRequest { resource_logs },
+                        ))
+                        .await;
+
+                    match result {
+                        Ok(_) => {
+                            otel_debug!(name: "TonicLogsClient.ExportSucceeded");
+                            Ok(())
+                        }
+                        Err(e) => {
+                            let error = format!("export error: {e:?}");
+                            otel_debug!(name: "TonicLogsClient.ExportFailed", error = &error);
+                            Err(OTelSdkError::InternalFailure(error))
+                        }
+                    }
+                })
             }
-        }
+        })
+        .await
     }
 
     fn shutdown_with_timeout(&self, _timeout: time::Duration) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index 3474b2b590..2ddbec77ca 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -365,6 +365,7 @@ mod metric;
 #[cfg(feature = "trace")]
 #[cfg(any(feature = "http-proto", feature = "http-json", feature = "grpc-tonic"))]
 mod span;
+mod retry;
 
 pub use crate::exporter::Compression;
 pub use crate::exporter::ExportConfig;
diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs
new file mode 100644
index 0000000000..841ade870d
--- /dev/null
+++ b/opentelemetry-otlp/src/retry.rs
@@ -0,0 +1,47 @@
+use std::future::Future;
+use std::time::{Duration, SystemTime};
+use opentelemetry::otel_warn;
+use tokio::time::sleep;
+
+pub(crate) struct RetryPolicy {
+    pub max_retries: usize,
+    pub initial_delay_ms: u64,
+    pub max_delay_ms: u64,
+    pub jitter_ms: u64,
+}
+
+fn generate_jitter(max_jitter: u64) -> u64 {
+    let now = SystemTime::now();
+    let nanos = now.duration_since(SystemTime::UNIX_EPOCH).unwrap().subsec_nanos();
+    nanos as u64 % (max_jitter + 1)
+}
+
+pub(crate) async fn retry_with_exponential_backoff<F, Fut, T, E>(
+    policy: RetryPolicy,
+    operation_name: &str,
+    mut operation: F,
+) -> Result<T, E>
+where
+    F: FnMut() -> Fut,
+    E: std::fmt::Debug,
+    Fut: Future<Output = Result<T, E>>,
+{
+    let mut attempt = 0;
+    let mut delay = policy.initial_delay_ms;
+
+    loop {
+        match operation().await {
+            Ok(result) => return Ok(result),
+            Err(err) if attempt < policy.max_retries => {
+                attempt += 1;
+                // Log the error and retry after a delay with jitter
+                otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
+                let jitter = generate_jitter(policy.jitter_ms);
+                let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
+                sleep(Duration::from_millis(delay_with_jitter)).await;
+                delay = std::cmp::min(delay * 2, policy.max_delay_ms);
+            }
+            Err(err) => return Err(err),
+        }
+    }
+}
\ No newline at end of file
diff --git a/opentelemetry-proto/src/transform/logs.rs b/opentelemetry-proto/src/transform/logs.rs
index 65d0598216..7d68decf82 100644
--- a/opentelemetry-proto/src/transform/logs.rs
+++ b/opentelemetry-proto/src/transform/logs.rs
@@ -165,8 +165,8 @@ pub mod tonic {
         }
     }
 
-    pub fn group_logs_by_resource_and_scope(
-        logs: LogBatch<'_>,
+    pub fn group_logs_by_resource_and_scope<'a>(
+        logs: &'a LogBatch<'a>,
         resource: &ResourceAttributesWithSchema,
     ) -> Vec<ResourceLogs> {
         // Group logs by target or instrumentation name
@@ -271,7 +271,7 @@ mod tests {
         let resource: ResourceAttributesWithSchema = (&resource).into(); // Convert Resource to ResourceAttributesWithSchema
 
         let grouped_logs =
-            crate::transform::logs::tonic::group_logs_by_resource_and_scope(log_batch, &resource);
+            crate::transform::logs::tonic::group_logs_by_resource_and_scope(&log_batch, &resource);
 
         assert_eq!(grouped_logs.len(), 1);
         let resource_logs = &grouped_logs[0];
@@ -291,7 +291,7 @@ mod tests {
         let log_batch = LogBatch::new(&logs);
         let resource: ResourceAttributesWithSchema = (&resource).into(); // Convert Resource to ResourceAttributesWithSchema
         let grouped_logs =
-            crate::transform::logs::tonic::group_logs_by_resource_and_scope(log_batch, &resource);
+            crate::transform::logs::tonic::group_logs_by_resource_and_scope(&log_batch, &resource);
 
         assert_eq!(grouped_logs.len(), 1);
         let resource_logs = &grouped_logs[0];

From d5743c6bea2b6b5f98c240a361d7f9f296e239fa Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Thu, 27 Feb 2025 11:07:20 -0800
Subject: [PATCH 04/27] Add tests and comments to retry.rs

---
 opentelemetry-otlp/src/retry.rs | 106 +++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 3 deletions(-)

diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs
index 841ade870d..de75f51411 100644
--- a/opentelemetry-otlp/src/retry.rs
+++ b/opentelemetry-otlp/src/retry.rs
@@ -10,12 +10,14 @@ pub(crate) struct RetryPolicy {
     pub jitter_ms: u64,
 }
 
+// Generates a random jitter value up to max_jitter
 fn generate_jitter(max_jitter: u64) -> u64 {
     let now = SystemTime::now();
     let nanos = now.duration_since(SystemTime::UNIX_EPOCH).unwrap().subsec_nanos();
     nanos as u64 % (max_jitter + 1)
 }
 
+// Retries the given operation with exponential backoff and jitter
 pub(crate) async fn retry_with_exponential_backoff<F, Fut, T, E>(
     policy: RetryPolicy,
     operation_name: &str,
@@ -31,7 +33,7 @@ where
 
     loop {
         match operation().await {
-            Ok(result) => return Ok(result),
+            Ok(result) => return Ok(result), // Return the result if the operation succeeds
             Err(err) if attempt < policy.max_retries => {
                 attempt += 1;
                 // Log the error and retry after a delay with jitter
@@ -39,9 +41,107 @@ where
                 let jitter = generate_jitter(policy.jitter_ms);
                 let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
                 sleep(Duration::from_millis(delay_with_jitter)).await;
-                delay = std::cmp::min(delay * 2, policy.max_delay_ms);
+                delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
             }
-            Err(err) => return Err(err),
+            Err(err) => return Err(err), // Return the error if max retries are reached
         }
     }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::time::timeout;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration;
+
+    // Test to ensure generate_jitter returns a value within the expected range
+    #[tokio::test]
+    async fn test_generate_jitter() {
+        let max_jitter = 100;
+        let jitter = generate_jitter(max_jitter);
+        assert!(jitter <= max_jitter);
+    }
+
+    // Test to ensure retry_with_exponential_backoff succeeds on the first attempt
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_success() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            Box::pin(async { Ok::<_, ()>("success") })
+        }).await;
+
+        assert_eq!(result, Ok("success"));
+    }
+
+    // Test to ensure retry_with_exponential_backoff retries the operation and eventually succeeds
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_retries() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if attempt < 2 {
+                    Err::<&str, &str>("error") // Fail the first two attempts
+                } else {
+                    Ok::<&str, &str>("success") // Succeed on the third attempt
+                }
+            })
+        }).await;
+
+        assert_eq!(result, Ok("success"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 3); // Ensure there were 3 attempts
+    }
+
+    // Test to ensure retry_with_exponential_backoff fails after max retries
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_failure() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async { Err::<(), _>("error") }) // Always fail
+        }).await;
+
+        assert_eq!(result, Err("error"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 4); // Ensure there were 4 attempts (initial + 3 retries)
+    }
+
+    // Test to ensure retry_with_exponential_backoff respects the timeout
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_timeout() {
+        let policy = RetryPolicy {
+            max_retries: 12, // Increase the number of retries
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff(policy, "test_operation", || {
+            Box::pin(async { Err::<(), _>("error") }) // Always fail
+        })).await;
+
+        assert!(result.is_err()); // Ensure the operation times out
+    }
 }
\ No newline at end of file

From 20c137571a9e2e20cada18d8afe5cdc7e0776bd6 Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Thu, 13 Mar 2025 06:25:46 -0700
Subject: [PATCH 05/27] Move retry to opentelemetry-sdk (WIP)

---
 opentelemetry-otlp/Cargo.toml                 |  2 +-
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  4 +-
 opentelemetry-otlp/src/lib.rs                 |  1 -
 opentelemetry-sdk/src/lib.rs                  |  3 +
 .../src/retry.rs                              | 98 +++++++++++++++++--
 5 files changed, 95 insertions(+), 13 deletions(-)
 rename {opentelemetry-otlp => opentelemetry-sdk}/src/retry.rs (56%)

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index 9acf73a1fb..b345f6955b 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -69,7 +69,7 @@ serialize = ["serde", "serde_json"]
 default = ["http-proto", "reqwest-blocking-client", "trace", "metrics", "logs", "internal-logs"]
 
 # grpc using tonic
-grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic"]
+grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio"]
 gzip-tonic = ["tonic/gzip"]
 zstd-tonic = ["tonic/zstd"]
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 578a35b232..a60857eaf1 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -14,7 +14,7 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 
 use super::BoxInterceptor;
 
-use crate::retry::{retry_with_exponential_backoff, RetryPolicy};
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
 
 pub(crate) struct TonicLogsClient {
     inner: Mutex<Option<ClientInner>>,
@@ -70,7 +70,7 @@ impl LogExporter for TonicLogsClient {
 
         let batch = Arc::new(batch);
 
-        retry_with_exponential_backoff(policy, "TonicLogsClient.Export", {
+        retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "TonicLogsClient.Export", {
             let batch = Arc::clone(&batch);
             let inner = &self.inner;
             let resource = &self.resource;
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index 2ddbec77ca..3474b2b590 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -365,7 +365,6 @@ mod metric;
 #[cfg(feature = "trace")]
 #[cfg(any(feature = "http-proto", feature = "http-json", feature = "grpc-tonic"))]
 mod span;
-mod retry;
 
 pub use crate::exporter::Compression;
 pub use crate::exporter::ExportConfig;
diff --git a/opentelemetry-sdk/src/lib.rs b/opentelemetry-sdk/src/lib.rs
index 8f8e5a8653..1143e333f8 100644
--- a/opentelemetry-sdk/src/lib.rs
+++ b/opentelemetry-sdk/src/lib.rs
@@ -167,3 +167,6 @@ impl<T> From<std::sync::PoisonError<T>> for InMemoryExporterError {
         InMemoryExporterError::InternalFailure(format!("Mutex poison error: {err}"))
     }
 }
+
+/// Retry logic for exporting telemetry data.
+pub mod retry;
diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-sdk/src/retry.rs
similarity index 56%
rename from opentelemetry-otlp/src/retry.rs
rename to opentelemetry-sdk/src/retry.rs
index de75f51411..c65247c6ec 100644
--- a/opentelemetry-otlp/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -1,12 +1,31 @@
+//! This module provides functionality for retrying operations with exponential backoff and jitter.
+//!
+//! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
+//! number of retries, initial delay, maximum delay, and jitter.
+//!
+//! The `Sleep` trait abstracts the sleep functionality, allowing different implementations for
+//! various async runtimes such as Tokio and async-std, as well as a synchronous implementation.
+//!
+//! The `retry_with_exponential_backoff` function retries the given operation according to the
+//! specified retry policy, using exponential backoff and jitter to determine the delay between
+//! retries. The function logs errors and retries the operation until it succeeds or the maximum
+//! number of retries is reached.
+
 use std::future::Future;
+use std::pin::Pin;
 use std::time::{Duration, SystemTime};
 use opentelemetry::otel_warn;
-use tokio::time::sleep;
 
-pub(crate) struct RetryPolicy {
+/// Configuration for retry policy.
+#[derive(Debug)]
+pub struct RetryPolicy {
+    /// Maximum number of retry attempts.
     pub max_retries: usize,
+    /// Initial delay in milliseconds before the first retry.
     pub initial_delay_ms: u64,
+    /// Maximum delay in milliseconds between retries.
     pub max_delay_ms: u64,
+    /// Maximum jitter in milliseconds to add to the delay.
     pub jitter_ms: u64,
 }
 
@@ -17,8 +36,68 @@ fn generate_jitter(max_jitter: u64) -> u64 {
     nanos as u64 % (max_jitter + 1)
 }
 
-// Retries the given operation with exponential backoff and jitter
-pub(crate) async fn retry_with_exponential_backoff<F, Fut, T, E>(
+/// Trait to abstract the sleep functionality.
+pub trait Sleep {
+    /// The future returned by the sleep function.
+    type SleepFuture: Future<Output = ()>;
+
+    /// Sleeps for the specified duration.
+    fn sleep(duration: Duration) -> Self::SleepFuture;
+}
+
+/// Implementation of the Sleep trait for tokio::time::Sleep
+#[cfg(feature = "rt-tokio")]
+impl Sleep for tokio::time::Sleep {
+    type SleepFuture = tokio::time::Sleep;
+
+    fn sleep(duration: Duration) -> Self::SleepFuture {
+        tokio::time::sleep(duration)
+    }
+}
+
+#[cfg(feature = "rt-async-std")]
+/// There is no direct equivalent to `tokio::time::Sleep` in `async-std`.
+/// Instead, we create a new struct `AsyncStdSleep` and implement the `Sleep`
+/// trait for it, boxing the future returned by `async_std::task::sleep` to fit
+/// the trait's associated type requirements.
+#[derive(Debug)]
+pub struct AsyncStdSleep;
+
+/// Implementation of the Sleep trait for async-std
+#[cfg(feature = "rt-async-std")]
+impl Sleep for AsyncStdSleep {
+    type SleepFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
+
+    fn sleep(duration: Duration) -> Self::SleepFuture {
+        Box::pin(async_std::task::sleep(duration))
+    }
+}
+
+/// Implement the Sleep trait for synchronous sleep
+#[derive(Debug)]
+pub struct StdSleep;
+
+impl Sleep for StdSleep {
+    type SleepFuture = std::future::Ready<()>;
+
+    fn sleep(duration: Duration) -> Self::SleepFuture {
+        std::thread::sleep(duration);
+        std::future::ready(())
+    }
+}
+
+/// Retries the given operation with exponential backoff and jitter.
+///
+/// # Arguments
+///
+/// * `policy` - The retry policy configuration.
+/// * `operation_name` - The name of the operation being retried.
+/// * `operation` - The operation to be retried.
+///
+/// # Returns
+///
+/// A `Result` containing the operation's result or an error if the maximum retries are reached.
+pub async fn retry_with_exponential_backoff<F, Fut, T, E, S>(
     policy: RetryPolicy,
     operation_name: &str,
     mut operation: F,
@@ -27,6 +106,7 @@ where
     F: FnMut() -> Fut,
     E: std::fmt::Debug,
     Fut: Future<Output = Result<T, E>>,
+    S: Sleep,
 {
     let mut attempt = 0;
     let mut delay = policy.initial_delay_ms;
@@ -40,7 +120,7 @@ where
                 otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
                 let jitter = generate_jitter(policy.jitter_ms);
                 let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
-                sleep(Duration::from_millis(delay_with_jitter)).await;
+                S::sleep(Duration::from_millis(delay_with_jitter)).await;
                 delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
             }
             Err(err) => return Err(err), // Return the error if max retries are reached
@@ -73,7 +153,7 @@ mod tests {
             jitter_ms: 100,
         };
 
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
             Box::pin(async { Ok::<_, ()>("success") })
         }).await;
 
@@ -92,7 +172,7 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
             let attempt = attempts.fetch_add(1, Ordering::SeqCst);
             Box::pin(async move {
                 if attempt < 2 {
@@ -119,7 +199,7 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
             attempts.fetch_add(1, Ordering::SeqCst);
             Box::pin(async { Err::<(), _>("error") }) // Always fail
         }).await;
@@ -138,7 +218,7 @@ mod tests {
             jitter_ms: 100,
         };
 
-        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff(policy, "test_operation", || {
+        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
             Box::pin(async { Err::<(), _>("error") }) // Always fail
         })).await;
 

From b3e17ef1791f2449a9ff66e4fdf9e126f1ecfb78 Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Thu, 27 Mar 2025 16:16:09 -0700
Subject: [PATCH 06/27] Fix build warning

---
 opentelemetry-otlp/src/exporter/tonic/logs.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index a8427dffaf..a6c784437d 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -6,7 +6,6 @@ use opentelemetry_proto::tonic::collector::logs::v1::{
 };
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::logs::{LogBatch, LogExporter};
-use std::time;
 use tokio::sync::Mutex;
 use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Channel, Request};
 
@@ -118,11 +117,9 @@ impl LogExporter for TonicLogsClient {
         .await
     }
 
-    fn shutdown(&mut self) -> OTelSdkResult {
-        match self.inner.take() {
-            Some(_) => Ok(()), // Successfully took `inner`, indicating a successful shutdown.
-            None => Err(OTelSdkError::AlreadyShutdown), // `inner` was already `None`, meaning it's already shut down.
-        }
+    fn shutdown(&self) -> OTelSdkResult {
+        // TODO: We broke this rebasing. fix it!
+        Ok(())
     }
 
     fn set_resource(&mut self, resource: &opentelemetry_sdk::Resource) {

From edcc00cd488cabfa472b2464007ef30c4243a8f0 Mon Sep 17 00:00:00 2001
From: Aaron Marten <437496+AaronRM@users.noreply.github.com>
Date: Thu, 27 Mar 2025 16:16:32 -0700
Subject: [PATCH 07/27] Scope retry to just logs+tonic

---
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  10 +-
 opentelemetry-otlp/src/exporter/tonic/mod.rs  |   5 +
 .../src/exporter/tonic/retry.rs               | 229 ++++++++++++++++++
 3 files changed, 242 insertions(+), 2 deletions(-)
 create mode 100644 opentelemetry-otlp/src/exporter/tonic/retry.rs

diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index a6c784437d..f67b7b4066 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -90,7 +90,7 @@ impl LogExporter for TonicLogsClient {
 
                     let resource_logs = group_logs_by_resource_and_scope(&*batch, resource);
 
-                    otel_debug!(name: "TonicsLogsClient.ExportStarted");
+                    otel_debug!(name: "TonicLogsClient.ExportStarted");
 
                     let result = client
                         .export(Request::from_parts(
@@ -118,7 +118,13 @@ impl LogExporter for TonicLogsClient {
     }
 
     fn shutdown(&self) -> OTelSdkResult {
-        // TODO: We broke this rebasing. fix it!
+        // TODO: Implement actual shutdown
+        // Due to the use of tokio::sync::Mutex to guard
+        // the inner client, we need to await the call to lock the mutex
+        // and that requires async runtime.
+        // It is possible to fix this by using
+        // a dedicated thread just to handle shutdown.
+        // But for now, we just return Ok.
         Ok(())
     }
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index 6986107602..08ff858c36 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -25,6 +25,11 @@ pub(crate) mod metrics;
 #[cfg(feature = "trace")]
 pub(crate) mod trace;
 
+// For now, we are not exposing the retry policy. Only work with grpc-tonic since retry takes a hard dependency on tokio
+// while we sort out an abstraction for the async runtime which can be used by all exporters.
+#[cfg(feature = "grpc-tonic")]
+mod retry;
+
 /// Configuration for [tonic]
 ///
 /// [tonic]: https://github.com/hyperium/tonic
diff --git a/opentelemetry-otlp/src/exporter/tonic/retry.rs b/opentelemetry-otlp/src/exporter/tonic/retry.rs
new file mode 100644
index 0000000000..d9a283a463
--- /dev/null
+++ b/opentelemetry-otlp/src/exporter/tonic/retry.rs
@@ -0,0 +1,229 @@
+//! This module provides functionality for retrying operations with exponential backoff and jitter.
+//!
+//! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
+//! number of retries, initial delay, maximum delay, and jitter.
+//!
+//! The `Sleep` trait abstracts the sleep functionality, allowing different implementations for
+//! various async runtimes such as Tokio and async-std, as well as a synchronous implementation.
+//!
+//! The `retry_with_exponential_backoff` function retries the given operation according to the
+//! specified retry policy, using exponential backoff and jitter to determine the delay between
+//! retries. The function logs errors and retries the operation until it succeeds or the maximum
+//! number of retries is reached.
+
+use std::future::Future;
+use std::time::{Duration, SystemTime};
+use opentelemetry::otel_warn;
+
+/// Configuration for retry policy.
+#[derive(Debug)]
+pub(super) struct RetryPolicy {
+    /// Maximum number of retry attempts.
+    pub max_retries: usize,
+    /// Initial delay in milliseconds before the first retry.
+    pub initial_delay_ms: u64,
+    /// Maximum delay in milliseconds between retries.
+    pub max_delay_ms: u64,
+    /// Maximum jitter in milliseconds to add to the delay.
+    pub jitter_ms: u64,
+}
+
+// Generates a random jitter value up to max_jitter
+fn generate_jitter(max_jitter: u64) -> u64 {
+    let now = SystemTime::now();
+    let nanos = now.duration_since(SystemTime::UNIX_EPOCH).unwrap().subsec_nanos();
+    nanos as u64 % (max_jitter + 1)
+}
+
+// /// Trait to abstract the sleep functionality.
+// pub trait Sleep {
+//     /// The future returned by the sleep function.
+//     type SleepFuture: Future<Output = ()>;
+
+//     /// Sleeps for the specified duration.
+//     fn sleep(duration: Duration) -> Self::SleepFuture;
+// }
+
+// /// Implementation of the Sleep trait for tokio::time::Sleep
+// #[cfg(feature = "rt-tokio")]
+// impl Sleep for tokio::time::Sleep {
+//     type SleepFuture = tokio::time::Sleep;
+
+//     fn sleep(duration: Duration) -> Self::SleepFuture {
+//     }
+// }
+
+// #[cfg(feature = "rt-async-std")]
+// /// There is no direct equivalent to `tokio::time::Sleep` in `async-std`.
+// /// Instead, we create a new struct `AsyncStdSleep` and implement the `Sleep`
+// /// trait for it, boxing the future returned by `async_std::task::sleep` to fit
+// /// the trait's associated type requirements.
+// #[derive(Debug)]
+// pub struct AsyncStdSleep;
+
+// /// Implementation of the Sleep trait for async-std
+// #[cfg(feature = "rt-async-std")]
+// impl Sleep for AsyncStdSleep {
+//     type SleepFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
+
+//     fn sleep(duration: Duration) -> Self::SleepFuture {
+//         Box::pin(async_std::task::sleep(duration))
+//     }
+// }
+
+// /// Implement the Sleep trait for synchronous sleep
+// #[derive(Debug)]
+// pub struct StdSleep;
+
+// impl Sleep for StdSleep {
+//     type SleepFuture = std::future::Ready<()>;
+
+//     fn sleep(duration: Duration) -> Self::SleepFuture {
+//         std::thread::sleep(duration);
+//         std::future::ready(())
+//     }
+// }
+
+/// Retries the given operation with exponential backoff and jitter.
+///
+/// # Arguments
+///
+/// * `policy` - The retry policy configuration.
+/// * `operation_name` - The name of the operation being retried.
+/// * `operation` - The operation to be retried.
+///
+/// # Returns
+///
+/// A `Result` containing the operation's result or an error if the maximum retries are reached.
+pub(super) async fn retry_with_exponential_backoff<F, Fut, T, E>(
+    policy: RetryPolicy,
+    operation_name: &str,
+    mut operation: F,
+) -> Result<T, E>
+where
+    F: FnMut() -> Fut,
+    E: std::fmt::Debug,
+    Fut: Future<Output = Result<T, E>>,
+{
+    let mut attempt = 0;
+    let mut delay = policy.initial_delay_ms;
+
+    loop {
+        match operation().await {
+            Ok(result) => return Ok(result), // Return the result if the operation succeeds
+            Err(err) if attempt < policy.max_retries => {
+                attempt += 1;
+                // Log the error and retry after a delay with jitter
+                otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
+                let jitter = generate_jitter(policy.jitter_ms);
+                let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
+
+                // Retry currently only supports tokio::time::sleep (for use with gRPC/tonic). This
+                // should be replaced with a more generic sleep function that works with async-std
+                // and a synchronous runtime in the future.
+                tokio::time::sleep(Duration::from_millis(delay_with_jitter)).await;
+
+                delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
+            }
+            Err(err) => return Err(err), // Return the error if max retries are reached
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::time::timeout;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration;
+
+    // Test to ensure generate_jitter returns a value within the expected range
+    #[tokio::test]
+    async fn test_generate_jitter() {
+        let max_jitter = 100;
+        let jitter = generate_jitter(max_jitter);
+        assert!(jitter <= max_jitter);
+    }
+
+    // Test to ensure retry_with_exponential_backoff succeeds on the first attempt
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_success() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            Box::pin(async { Ok::<_, ()>("success") })
+        }).await;
+
+        assert_eq!(result, Ok("success"));
+    }
+
+    // Test to ensure retry_with_exponential_backoff retries the operation and eventually succeeds
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_retries() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if attempt < 2 {
+                    Err::<&str, &str>("error") // Fail the first two attempts
+                } else {
+                    Ok::<&str, &str>("success") // Succeed on the third attempt
+                }
+            })
+        }).await;
+
+        assert_eq!(result, Ok("success"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 3); // Ensure there were 3 attempts
+    }
+
+    // Test to ensure retry_with_exponential_backoff fails after max retries
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_failure() {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        let result = retry_with_exponential_backoff(policy, "test_operation", || {
+            attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async { Err::<(), _>("error") }) // Always fail
+        }).await;
+
+        assert_eq!(result, Err("error"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 4); // Ensure there were 4 attempts (initial + 3 retries)
+    }
+
+    // Test to ensure retry_with_exponential_backoff respects the timeout
+    #[tokio::test]
+    async fn test_retry_with_exponential_backoff_timeout() {
+        let policy = RetryPolicy {
+            max_retries: 12, // Increase the number of retries
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff(policy, "test_operation", || {
+            Box::pin(async { Err::<(), _>("error") }) // Always fail
+        })).await;
+
+        assert!(result.is_err()); // Ensure the operation times out
+    }
+}
\ No newline at end of file

From ee75730141d1174c5f8bfdef5885af98838c7567 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Mon, 11 Aug 2025 17:33:34 +0200
Subject: [PATCH 08/27] clean up after rebase

---
 opentelemetry-otlp/src/exporter/tonic/logs.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index f67b7b4066..801e37bd99 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -6,6 +6,7 @@ use opentelemetry_proto::tonic::collector::logs::v1::{
 };
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::logs::{LogBatch, LogExporter};
+use std::time;
 use tokio::sync::Mutex;
 use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Channel, Request};
 
@@ -117,7 +118,7 @@ impl LogExporter for TonicLogsClient {
         .await
     }
 
-    fn shutdown(&self) -> OTelSdkResult {
+    fn shutdown_with_timeout(&self, _timeout: time::Duration) -> OTelSdkResult {
         // TODO: Implement actual shutdown
         // Due to the use of tokio::sync::Mutex to guard
         // the inner client, we need to await the call to lock the mutex

From bcdc17974df81500c384174115288f2c18fc48c8 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 09:15:43 +0200
Subject: [PATCH 09/27] migrate to the async runtime abstraction

---
 opentelemetry-otlp/Cargo.toml                 |   2 +-
 opentelemetry-otlp/src/exporter/tonic/logs.rs |   9 +-
 opentelemetry-otlp/src/exporter/tonic/mod.rs  |   5 -
 .../src/exporter/tonic/retry.rs               | 229 ------------------
 opentelemetry-sdk/src/retry.rs                | 146 +++++------
 5 files changed, 82 insertions(+), 309 deletions(-)
 delete mode 100644 opentelemetry-otlp/src/exporter/tonic/retry.rs

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index b345f6955b..3d28ea9de2 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -69,7 +69,7 @@ serialize = ["serde", "serde_json"]
 default = ["http-proto", "reqwest-blocking-client", "trace", "metrics", "logs", "internal-logs"]
 
 # grpc using tonic
-grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio"]
+grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio", "opentelemetry_sdk/experimental_async_runtime"]
 gzip-tonic = ["tonic/gzip"]
 zstd-tonic = ["tonic/zstd"]
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 801e37bd99..62d58548f0 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -1,4 +1,3 @@
-use std::sync::Arc;
 use core::fmt;
 use opentelemetry::otel_debug;
 use opentelemetry_proto::tonic::collector::logs::v1::{
@@ -6,6 +5,7 @@ use opentelemetry_proto::tonic::collector::logs::v1::{
 };
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::logs::{LogBatch, LogExporter};
+use std::sync::Arc;
 use std::time;
 use tokio::sync::Mutex;
 use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Channel, Request};
@@ -15,6 +15,7 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 use super::BoxInterceptor;
 
 use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicLogsClient {
     inner: Mutex<Option<ClientInner>>,
@@ -70,7 +71,7 @@ impl LogExporter for TonicLogsClient {
 
         let batch = Arc::new(batch);
 
-        retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "TonicLogsClient.Export", {
+        retry_with_exponential_backoff(Tokio, policy, "TonicLogsClient.Export", {
             let batch = Arc::clone(&batch);
             let inner = &self.inner;
             let resource = &self.resource;
@@ -82,7 +83,9 @@ impl LogExporter for TonicLogsClient {
                             let (m, e, _) = inner
                                 .interceptor
                                 .call(Request::new(()))
-                                .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
+                                .map_err(|e| {
+                                    OTelSdkError::InternalFailure(format!("error: {e:?}"))
+                                })?
                                 .into_parts();
                             (inner.client.clone(), m, e)
                         }
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index 08ff858c36..6986107602 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -25,11 +25,6 @@ pub(crate) mod metrics;
 #[cfg(feature = "trace")]
 pub(crate) mod trace;
 
-// For now, we are not exposing the retry policy. Only work with grpc-tonic since retry takes a hard dependency on tokio
-// while we sort out an abstraction for the async runtime which can be used by all exporters.
-#[cfg(feature = "grpc-tonic")]
-mod retry;
-
 /// Configuration for [tonic]
 ///
 /// [tonic]: https://github.com/hyperium/tonic
diff --git a/opentelemetry-otlp/src/exporter/tonic/retry.rs b/opentelemetry-otlp/src/exporter/tonic/retry.rs
deleted file mode 100644
index d9a283a463..0000000000
--- a/opentelemetry-otlp/src/exporter/tonic/retry.rs
+++ /dev/null
@@ -1,229 +0,0 @@
-//! This module provides functionality for retrying operations with exponential backoff and jitter.
-//!
-//! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
-//! number of retries, initial delay, maximum delay, and jitter.
-//!
-//! The `Sleep` trait abstracts the sleep functionality, allowing different implementations for
-//! various async runtimes such as Tokio and async-std, as well as a synchronous implementation.
-//!
-//! The `retry_with_exponential_backoff` function retries the given operation according to the
-//! specified retry policy, using exponential backoff and jitter to determine the delay between
-//! retries. The function logs errors and retries the operation until it succeeds or the maximum
-//! number of retries is reached.
-
-use std::future::Future;
-use std::time::{Duration, SystemTime};
-use opentelemetry::otel_warn;
-
-/// Configuration for retry policy.
-#[derive(Debug)]
-pub(super) struct RetryPolicy {
-    /// Maximum number of retry attempts.
-    pub max_retries: usize,
-    /// Initial delay in milliseconds before the first retry.
-    pub initial_delay_ms: u64,
-    /// Maximum delay in milliseconds between retries.
-    pub max_delay_ms: u64,
-    /// Maximum jitter in milliseconds to add to the delay.
-    pub jitter_ms: u64,
-}
-
-// Generates a random jitter value up to max_jitter
-fn generate_jitter(max_jitter: u64) -> u64 {
-    let now = SystemTime::now();
-    let nanos = now.duration_since(SystemTime::UNIX_EPOCH).unwrap().subsec_nanos();
-    nanos as u64 % (max_jitter + 1)
-}
-
-// /// Trait to abstract the sleep functionality.
-// pub trait Sleep {
-//     /// The future returned by the sleep function.
-//     type SleepFuture: Future<Output = ()>;
-
-//     /// Sleeps for the specified duration.
-//     fn sleep(duration: Duration) -> Self::SleepFuture;
-// }
-
-// /// Implementation of the Sleep trait for tokio::time::Sleep
-// #[cfg(feature = "rt-tokio")]
-// impl Sleep for tokio::time::Sleep {
-//     type SleepFuture = tokio::time::Sleep;
-
-//     fn sleep(duration: Duration) -> Self::SleepFuture {
-//     }
-// }
-
-// #[cfg(feature = "rt-async-std")]
-// /// There is no direct equivalent to `tokio::time::Sleep` in `async-std`.
-// /// Instead, we create a new struct `AsyncStdSleep` and implement the `Sleep`
-// /// trait for it, boxing the future returned by `async_std::task::sleep` to fit
-// /// the trait's associated type requirements.
-// #[derive(Debug)]
-// pub struct AsyncStdSleep;
-
-// /// Implementation of the Sleep trait for async-std
-// #[cfg(feature = "rt-async-std")]
-// impl Sleep for AsyncStdSleep {
-//     type SleepFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
-
-//     fn sleep(duration: Duration) -> Self::SleepFuture {
-//         Box::pin(async_std::task::sleep(duration))
-//     }
-// }
-
-// /// Implement the Sleep trait for synchronous sleep
-// #[derive(Debug)]
-// pub struct StdSleep;
-
-// impl Sleep for StdSleep {
-//     type SleepFuture = std::future::Ready<()>;
-
-//     fn sleep(duration: Duration) -> Self::SleepFuture {
-//         std::thread::sleep(duration);
-//         std::future::ready(())
-//     }
-// }
-
-/// Retries the given operation with exponential backoff and jitter.
-///
-/// # Arguments
-///
-/// * `policy` - The retry policy configuration.
-/// * `operation_name` - The name of the operation being retried.
-/// * `operation` - The operation to be retried.
-///
-/// # Returns
-///
-/// A `Result` containing the operation's result or an error if the maximum retries are reached.
-pub(super) async fn retry_with_exponential_backoff<F, Fut, T, E>(
-    policy: RetryPolicy,
-    operation_name: &str,
-    mut operation: F,
-) -> Result<T, E>
-where
-    F: FnMut() -> Fut,
-    E: std::fmt::Debug,
-    Fut: Future<Output = Result<T, E>>,
-{
-    let mut attempt = 0;
-    let mut delay = policy.initial_delay_ms;
-
-    loop {
-        match operation().await {
-            Ok(result) => return Ok(result), // Return the result if the operation succeeds
-            Err(err) if attempt < policy.max_retries => {
-                attempt += 1;
-                // Log the error and retry after a delay with jitter
-                otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
-                let jitter = generate_jitter(policy.jitter_ms);
-                let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
-
-                // Retry currently only supports tokio::time::sleep (for use with gRPC/tonic). This
-                // should be replaced with a more generic sleep function that works with async-std
-                // and a synchronous runtime in the future.
-                tokio::time::sleep(Duration::from_millis(delay_with_jitter)).await;
-
-                delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
-            }
-            Err(err) => return Err(err), // Return the error if max retries are reached
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tokio::time::timeout;
-    use std::sync::atomic::{AtomicUsize, Ordering};
-    use std::time::Duration;
-
-    // Test to ensure generate_jitter returns a value within the expected range
-    #[tokio::test]
-    async fn test_generate_jitter() {
-        let max_jitter = 100;
-        let jitter = generate_jitter(max_jitter);
-        assert!(jitter <= max_jitter);
-    }
-
-    // Test to ensure retry_with_exponential_backoff succeeds on the first attempt
-    #[tokio::test]
-    async fn test_retry_with_exponential_backoff_success() {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
-            Box::pin(async { Ok::<_, ()>("success") })
-        }).await;
-
-        assert_eq!(result, Ok("success"));
-    }
-
-    // Test to ensure retry_with_exponential_backoff retries the operation and eventually succeeds
-    #[tokio::test]
-    async fn test_retry_with_exponential_backoff_retries() {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        let attempts = AtomicUsize::new(0);
-
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
-            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-            Box::pin(async move {
-                if attempt < 2 {
-                    Err::<&str, &str>("error") // Fail the first two attempts
-                } else {
-                    Ok::<&str, &str>("success") // Succeed on the third attempt
-                }
-            })
-        }).await;
-
-        assert_eq!(result, Ok("success"));
-        assert_eq!(attempts.load(Ordering::SeqCst), 3); // Ensure there were 3 attempts
-    }
-
-    // Test to ensure retry_with_exponential_backoff fails after max retries
-    #[tokio::test]
-    async fn test_retry_with_exponential_backoff_failure() {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        let attempts = AtomicUsize::new(0);
-
-        let result = retry_with_exponential_backoff(policy, "test_operation", || {
-            attempts.fetch_add(1, Ordering::SeqCst);
-            Box::pin(async { Err::<(), _>("error") }) // Always fail
-        }).await;
-
-        assert_eq!(result, Err("error"));
-        assert_eq!(attempts.load(Ordering::SeqCst), 4); // Ensure there were 4 attempts (initial + 3 retries)
-    }
-
-    // Test to ensure retry_with_exponential_backoff respects the timeout
-    #[tokio::test]
-    async fn test_retry_with_exponential_backoff_timeout() {
-        let policy = RetryPolicy {
-            max_retries: 12, // Increase the number of retries
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff(policy, "test_operation", || {
-            Box::pin(async { Err::<(), _>("error") }) // Always fail
-        })).await;
-
-        assert!(result.is_err()); // Ensure the operation times out
-    }
-}
\ No newline at end of file
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-sdk/src/retry.rs
index c65247c6ec..59e9fc0c0a 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -3,18 +3,20 @@
 //! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
 //! number of retries, initial delay, maximum delay, and jitter.
 //!
-//! The `Sleep` trait abstracts the sleep functionality, allowing different implementations for
-//! various async runtimes such as Tokio and async-std, as well as a synchronous implementation.
-//!
 //! The `retry_with_exponential_backoff` function retries the given operation according to the
 //! specified retry policy, using exponential backoff and jitter to determine the delay between
 //! retries. The function logs errors and retries the operation until it succeeds or the maximum
 //! number of retries is reached.
 
+#[cfg(feature = "experimental_async_runtime")]
+use opentelemetry::otel_warn;
+#[cfg(feature = "experimental_async_runtime")]
 use std::future::Future;
-use std::pin::Pin;
+#[cfg(feature = "experimental_async_runtime")]
 use std::time::{Duration, SystemTime};
-use opentelemetry::otel_warn;
+
+#[cfg(feature = "experimental_async_runtime")]
+use crate::runtime::Runtime;
 
 /// Configuration for retry policy.
 #[derive(Debug)]
@@ -29,67 +31,36 @@ pub struct RetryPolicy {
     pub jitter_ms: u64,
 }
 
+/// A runtime stub for when experimental_async_runtime is not enabled.
+/// This allows retry policy to be configured but no actual retries occur.
+#[cfg(not(feature = "experimental_async_runtime"))]
+#[derive(Debug, Clone)]
+pub struct NoOpRuntime;
+
+#[cfg(not(feature = "experimental_async_runtime"))]
+impl NoOpRuntime {
+    /// Creates a new no-op runtime.
+    pub fn new() -> Self {
+        Self
+    }
+}
+
 // Generates a random jitter value up to max_jitter
+#[cfg(feature = "experimental_async_runtime")]
 fn generate_jitter(max_jitter: u64) -> u64 {
     let now = SystemTime::now();
-    let nanos = now.duration_since(SystemTime::UNIX_EPOCH).unwrap().subsec_nanos();
+    let nanos = now
+        .duration_since(SystemTime::UNIX_EPOCH)
+        .unwrap()
+        .subsec_nanos();
     nanos as u64 % (max_jitter + 1)
 }
 
-/// Trait to abstract the sleep functionality.
-pub trait Sleep {
-    /// The future returned by the sleep function.
-    type SleepFuture: Future<Output = ()>;
-
-    /// Sleeps for the specified duration.
-    fn sleep(duration: Duration) -> Self::SleepFuture;
-}
-
-/// Implementation of the Sleep trait for tokio::time::Sleep
-#[cfg(feature = "rt-tokio")]
-impl Sleep for tokio::time::Sleep {
-    type SleepFuture = tokio::time::Sleep;
-
-    fn sleep(duration: Duration) -> Self::SleepFuture {
-        tokio::time::sleep(duration)
-    }
-}
-
-#[cfg(feature = "rt-async-std")]
-/// There is no direct equivalent to `tokio::time::Sleep` in `async-std`.
-/// Instead, we create a new struct `AsyncStdSleep` and implement the `Sleep`
-/// trait for it, boxing the future returned by `async_std::task::sleep` to fit
-/// the trait's associated type requirements.
-#[derive(Debug)]
-pub struct AsyncStdSleep;
-
-/// Implementation of the Sleep trait for async-std
-#[cfg(feature = "rt-async-std")]
-impl Sleep for AsyncStdSleep {
-    type SleepFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
-
-    fn sleep(duration: Duration) -> Self::SleepFuture {
-        Box::pin(async_std::task::sleep(duration))
-    }
-}
-
-/// Implement the Sleep trait for synchronous sleep
-#[derive(Debug)]
-pub struct StdSleep;
-
-impl Sleep for StdSleep {
-    type SleepFuture = std::future::Ready<()>;
-
-    fn sleep(duration: Duration) -> Self::SleepFuture {
-        std::thread::sleep(duration);
-        std::future::ready(())
-    }
-}
-
 /// Retries the given operation with exponential backoff and jitter.
 ///
 /// # Arguments
 ///
+/// * `runtime` - The async runtime to use for delays.
 /// * `policy` - The retry policy configuration.
 /// * `operation_name` - The name of the operation being retried.
 /// * `operation` - The operation to be retried.
@@ -97,16 +68,18 @@ impl Sleep for StdSleep {
 /// # Returns
 ///
 /// A `Result` containing the operation's result or an error if the maximum retries are reached.
-pub async fn retry_with_exponential_backoff<F, Fut, T, E, S>(
+#[cfg(feature = "experimental_async_runtime")]
+pub async fn retry_with_exponential_backoff<R, F, Fut, T, E>(
+    runtime: R,
     policy: RetryPolicy,
     operation_name: &str,
     mut operation: F,
 ) -> Result<T, E>
 where
+    R: Runtime,
     F: FnMut() -> Fut,
     E: std::fmt::Debug,
     Fut: Future<Output = Result<T, E>>,
-    S: Sleep,
 {
     let mut attempt = 0;
     let mut delay = policy.initial_delay_ms;
@@ -120,7 +93,9 @@ where
                 otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
                 let jitter = generate_jitter(policy.jitter_ms);
                 let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
-                S::sleep(Duration::from_millis(delay_with_jitter)).await;
+                runtime
+                    .delay(Duration::from_millis(delay_with_jitter))
+                    .await;
                 delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
             }
             Err(err) => return Err(err), // Return the error if max retries are reached
@@ -128,12 +103,30 @@ where
     }
 }
 
-#[cfg(test)]
+/// No-op retry function for when experimental_async_runtime is not enabled.
+/// This function will execute the operation exactly once without any retries.
+#[cfg(not(feature = "experimental_async_runtime"))]
+pub async fn retry_with_exponential_backoff<R, F, Fut, T, E>(
+    _runtime: R,
+    _policy: RetryPolicy,
+    _operation_name: &str,
+    mut operation: F,
+) -> Result<T, E>
+where
+    F: FnMut() -> Fut,
+    Fut: std::future::Future<Output = Result<T, E>>,
+{
+    // Without experimental_async_runtime, we just execute once without retries
+    operation().await
+}
+
+#[cfg(all(test, feature = "experimental_async_runtime", feature = "rt-tokio"))]
 mod tests {
     use super::*;
-    use tokio::time::timeout;
+    use crate::runtime::Tokio;
     use std::sync::atomic::{AtomicUsize, Ordering};
     use std::time::Duration;
+    use tokio::time::timeout;
 
     // Test to ensure generate_jitter returns a value within the expected range
     #[tokio::test]
@@ -146,6 +139,7 @@ mod tests {
     // Test to ensure retry_with_exponential_backoff succeeds on the first attempt
     #[tokio::test]
     async fn test_retry_with_exponential_backoff_success() {
+        let runtime = Tokio;
         let policy = RetryPolicy {
             max_retries: 3,
             initial_delay_ms: 100,
@@ -153,9 +147,10 @@ mod tests {
             jitter_ms: 100,
         };
 
-        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
             Box::pin(async { Ok::<_, ()>("success") })
-        }).await;
+        })
+        .await;
 
         assert_eq!(result, Ok("success"));
     }
@@ -163,6 +158,7 @@ mod tests {
     // Test to ensure retry_with_exponential_backoff retries the operation and eventually succeeds
     #[tokio::test]
     async fn test_retry_with_exponential_backoff_retries() {
+        let runtime = Tokio;
         let policy = RetryPolicy {
             max_retries: 3,
             initial_delay_ms: 100,
@@ -172,7 +168,7 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
             let attempt = attempts.fetch_add(1, Ordering::SeqCst);
             Box::pin(async move {
                 if attempt < 2 {
@@ -181,7 +177,8 @@ mod tests {
                     Ok::<&str, &str>("success") // Succeed on the third attempt
                 }
             })
-        }).await;
+        })
+        .await;
 
         assert_eq!(result, Ok("success"));
         assert_eq!(attempts.load(Ordering::SeqCst), 3); // Ensure there were 3 attempts
@@ -190,6 +187,7 @@ mod tests {
     // Test to ensure retry_with_exponential_backoff fails after max retries
     #[tokio::test]
     async fn test_retry_with_exponential_backoff_failure() {
+        let runtime = Tokio;
         let policy = RetryPolicy {
             max_retries: 3,
             initial_delay_ms: 100,
@@ -199,10 +197,11 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
+        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
             attempts.fetch_add(1, Ordering::SeqCst);
             Box::pin(async { Err::<(), _>("error") }) // Always fail
-        }).await;
+        })
+        .await;
 
         assert_eq!(result, Err("error"));
         assert_eq!(attempts.load(Ordering::SeqCst), 4); // Ensure there were 4 attempts (initial + 3 retries)
@@ -211,6 +210,7 @@ mod tests {
     // Test to ensure retry_with_exponential_backoff respects the timeout
     #[tokio::test]
     async fn test_retry_with_exponential_backoff_timeout() {
+        let runtime = Tokio;
         let policy = RetryPolicy {
             max_retries: 12, // Increase the number of retries
             initial_delay_ms: 100,
@@ -218,10 +218,14 @@ mod tests {
             jitter_ms: 100,
         };
 
-        let result = timeout(Duration::from_secs(1), retry_with_exponential_backoff::<_, _, _, _, tokio::time::Sleep>(policy, "test_operation", || {
-            Box::pin(async { Err::<(), _>("error") }) // Always fail
-        })).await;
+        let result = timeout(
+            Duration::from_secs(1),
+            retry_with_exponential_backoff(runtime, policy, "test_operation", || {
+                Box::pin(async { Err::<(), _>("error") }) // Always fail
+            }),
+        )
+        .await;
 
         assert!(result.is_err()); // Ensure the operation times out
     }
-}
\ No newline at end of file
+}

From ca457ba81e2c5578b39cadbc892e32997ecbeeb5 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 09:54:57 +0200
Subject: [PATCH 10/27] add retry to traces

---
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  2 +-
 .../src/exporter/tonic/trace.rs               | 86 ++++++++++++-------
 opentelemetry-sdk/src/retry.rs                |  2 +-
 3 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 62d58548f0..29946128ce 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -92,7 +92,7 @@ impl LogExporter for TonicLogsClient {
                         None => return Err(OTelSdkError::AlreadyShutdown),
                     };
 
-                    let resource_logs = group_logs_by_resource_and_scope(&*batch, resource);
+                    let resource_logs = group_logs_by_resource_and_scope(&batch, resource);
 
                     otel_debug!(name: "TonicLogsClient.ExportStarted");
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index 4378c37a04..2ce8b884a3 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -1,4 +1,5 @@
 use core::fmt;
+use std::sync::Arc;
 use tokio::sync::Mutex;
 
 use opentelemetry::otel_debug;
@@ -15,6 +16,9 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 
 use super::BoxInterceptor;
 
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use opentelemetry_sdk::runtime::Tokio;
+
 pub(crate) struct TonicTracesClient {
     inner: Option<ClientInner>,
     #[allow(dead_code)]
@@ -60,43 +64,63 @@ impl TonicTracesClient {
 
 impl SpanExporter for TonicTracesClient {
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
-        let (mut client, metadata, extensions) = match &self.inner {
-            Some(inner) => {
-                let (m, e, _) = inner
-                    .interceptor
-                    .lock()
-                    .await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
-                    .call(Request::new(()))
-                    .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
-                    .into_parts();
-                (inner.client.clone(), m, e)
-            }
-            None => return Err(OTelSdkError::AlreadyShutdown),
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
         };
 
-        let resource_spans = group_spans_by_resource_and_scope(batch, &self.resource);
+        let batch = Arc::new(batch);
 
-        otel_debug!(name: "TonicTracesClient.ExportStarted");
+        retry_with_exponential_backoff(Tokio, policy, "TonicTracesClient.Export", {
+            let batch = Arc::clone(&batch);
+            let inner = &self.inner;
+            let resource = &self.resource;
+            move || {
+                let batch = Arc::clone(&batch);
+                Box::pin(async move {
+                    let (mut client, metadata, extensions) = match inner {
+                        Some(inner) => {
+                            let (m, e, _) = inner
+                                .interceptor
+                                .lock()
+                                .await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
+                                .call(Request::new(()))
+                                .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
+                                .into_parts();
+                            (inner.client.clone(), m, e)
+                        }
+                        None => return Err(OTelSdkError::AlreadyShutdown),
+                    };
 
-        let result = client
-            .export(Request::from_parts(
-                metadata,
-                extensions,
-                ExportTraceServiceRequest { resource_spans },
-            ))
-            .await;
+                    let resource_spans = group_spans_by_resource_and_scope((*batch).clone(), resource);
 
-        match result {
-            Ok(_) => {
-                otel_debug!(name: "TonicTracesClient.ExportSucceeded");
-                Ok(())
-            }
-            Err(e) => {
-                let error = e.to_string();
-                otel_debug!(name: "TonicTracesClient.ExportFailed", error = &error);
-                Err(OTelSdkError::InternalFailure(error))
+                    otel_debug!(name: "TonicTracesClient.ExportStarted");
+
+                    let result = client
+                        .export(Request::from_parts(
+                            metadata,
+                            extensions,
+                            ExportTraceServiceRequest { resource_spans },
+                        ))
+                        .await;
+
+                    match result {
+                        Ok(_) => {
+                            otel_debug!(name: "TonicTracesClient.ExportSucceeded");
+                            Ok(())
+                        }
+                        Err(e) => {
+                            let error = format!("export error: {e:?}");
+                            otel_debug!(name: "TonicTracesClient.ExportFailed", error = &error);
+                            Err(OTelSdkError::InternalFailure(error))
+                        }
+                    }
+                })
             }
-        }
+        })
+        .await
     }
 
     fn shutdown(&mut self) -> OTelSdkResult {
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-sdk/src/retry.rs
index 59e9fc0c0a..1ff347b096 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -34,7 +34,7 @@ pub struct RetryPolicy {
 /// A runtime stub for when experimental_async_runtime is not enabled.
 /// This allows retry policy to be configured but no actual retries occur.
 #[cfg(not(feature = "experimental_async_runtime"))]
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct NoOpRuntime;
 
 #[cfg(not(feature = "experimental_async_runtime"))]

From 06d6effb0507dc950e21657954befaa790533adf Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 10:02:52 +0200
Subject: [PATCH 11/27] add retry to metrics

---
 .../src/exporter/tonic/metrics.rs             | 101 ++++++++++--------
 1 file changed, 59 insertions(+), 42 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index 13813c7305..07926366e0 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -12,6 +12,9 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 use super::BoxInterceptor;
 use crate::metric::MetricsClient;
 
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use opentelemetry_sdk::runtime::Tokio;
+
 pub(crate) struct TonicMetricsClient {
     inner: Mutex<Option<ClientInner>>,
 }
@@ -53,49 +56,63 @@ impl TonicMetricsClient {
 
 impl MetricsClient for TonicMetricsClient {
     async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
-        let (mut client, metadata, extensions) = self
-            .inner
-            .lock()
-            .map_err(|e| OTelSdkError::InternalFailure(format!("Failed to acquire lock: {e:?}")))
-            .and_then(|mut inner| match &mut *inner {
-                Some(inner) => {
-                    let (m, e, _) = inner
-                        .interceptor
-                        .call(Request::new(()))
-                        .map_err(|e| {
-                            OTelSdkError::InternalFailure(format!(
-                                "unexpected status while exporting {e:?}"
-                            ))
-                        })?
-                        .into_parts();
-                    Ok((inner.client.clone(), m, e))
-                }
-                None => Err(OTelSdkError::InternalFailure(
-                    "exporter is already shut down".into(),
-                )),
-            })?;
-
-        otel_debug!(name: "TonicMetricsClient.ExportStarted");
-
-        let result = client
-            .export(Request::from_parts(
-                metadata,
-                extensions,
-                ExportMetricsServiceRequest::from(metrics),
-            ))
-            .await;
-
-        match result {
-            Ok(_) => {
-                otel_debug!(name: "TonicMetricsClient.ExportSucceeded");
-                Ok(())
-            }
-            Err(e) => {
-                let error = format!("{e:?}");
-                otel_debug!(name: "TonicMetricsClient.ExportFailed", error = &error);
-                Err(OTelSdkError::InternalFailure(error))
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        retry_with_exponential_backoff(Tokio, policy, "TonicMetricsClient.Export", {
+            let inner = &self.inner;
+            move || {
+                Box::pin(async move {
+                    let (mut client, metadata, extensions) = inner
+                        .lock()
+                        .map_err(|e| OTelSdkError::InternalFailure(format!("Failed to acquire lock: {e:?}")))
+                        .and_then(|mut inner| match &mut *inner {
+                            Some(inner) => {
+                                let (m, e, _) = inner
+                                    .interceptor
+                                    .call(Request::new(()))
+                                    .map_err(|e| {
+                                        OTelSdkError::InternalFailure(format!(
+                                            "unexpected status while exporting {e:?}"
+                                        ))
+                                    })?
+                                    .into_parts();
+                                Ok((inner.client.clone(), m, e))
+                            }
+                            None => Err(OTelSdkError::InternalFailure(
+                                "exporter is already shut down".into(),
+                            )),
+                        })?;
+
+                    otel_debug!(name: "TonicMetricsClient.ExportStarted");
+
+                    let result = client
+                        .export(Request::from_parts(
+                            metadata,
+                            extensions,
+                            ExportMetricsServiceRequest::from(metrics),
+                        ))
+                        .await;
+
+                    match result {
+                        Ok(_) => {
+                            otel_debug!(name: "TonicMetricsClient.ExportSucceeded");
+                            Ok(())
+                        }
+                        Err(e) => {
+                            let error = format!("export error: {e:?}");
+                            otel_debug!(name: "TonicMetricsClient.ExportFailed", error = &error);
+                            Err(OTelSdkError::InternalFailure(error))
+                        }
+                    }
+                })
             }
-        }
+        })
+        .await
     }
 
     fn shutdown(&self) -> OTelSdkResult {

From 93fc942baca705a1f73c86dff95a3c0861924129 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 10:18:36 +0200
Subject: [PATCH 12/27] handle failure hints

---
 opentelemetry-otlp/Cargo.toml                 |   3 +-
 .../allowed-external-types.toml               |   3 +
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  96 ++--
 .../src/exporter/tonic/metrics.rs             |  99 ++--
 .../src/exporter/tonic/trace.rs               |  99 ++--
 opentelemetry-otlp/src/lib.rs                 |   3 +
 .../src/retry_classification.rs               | 480 ++++++++++++++++++
 opentelemetry-sdk/src/retry.rs                | 299 ++++++++++-
 8 files changed, 928 insertions(+), 154 deletions(-)
 create mode 100644 opentelemetry-otlp/src/retry_classification.rs

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index 3d28ea9de2..4c70fe1205 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -35,6 +35,7 @@ tracing = {workspace = true, optional = true}
 
 prost = { workspace = true, optional = true }
 tonic = { workspace = true, optional = true }
+tonic-types = { workspace = true, optional = true }
 tokio = { workspace = true, features = ["sync", "rt"], optional = true }
 
 reqwest = { workspace = true, optional = true }
@@ -69,7 +70,7 @@ serialize = ["serde", "serde_json"]
 default = ["http-proto", "reqwest-blocking-client", "trace", "metrics", "logs", "internal-logs"]
 
 # grpc using tonic
-grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio", "opentelemetry_sdk/experimental_async_runtime"]
+grpc-tonic = ["tonic", "tonic-types", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio", "opentelemetry_sdk/experimental_async_runtime"]
 gzip-tonic = ["tonic/gzip"]
 zstd-tonic = ["tonic/zstd"]
 
diff --git a/opentelemetry-otlp/allowed-external-types.toml b/opentelemetry-otlp/allowed-external-types.toml
index 435221a5cb..4e77afead3 100644
--- a/opentelemetry-otlp/allowed-external-types.toml
+++ b/opentelemetry-otlp/allowed-external-types.toml
@@ -18,4 +18,7 @@ allowed_external_types = [
     "tonic::transport::tls::Identity",
     "tonic::transport::channel::Channel",
     "tonic::service::interceptor::Interceptor",
+
+    # For retries
+    "tonic::status::Status"
 ]
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 29946128ce..279ac72d84 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -14,7 +14,8 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 
 use super::BoxInterceptor;
 
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use crate::retry_classification::grpc::classify_tonic_status;
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicLogsClient {
@@ -71,54 +72,57 @@ impl LogExporter for TonicLogsClient {
 
         let batch = Arc::new(batch);
 
-        retry_with_exponential_backoff(Tokio, policy, "TonicLogsClient.Export", {
-            let batch = Arc::clone(&batch);
-            let inner = &self.inner;
-            let resource = &self.resource;
-            move || {
-                let batch = Arc::clone(&batch);
-                Box::pin(async move {
-                    let (mut client, metadata, extensions) = match inner.lock().await.as_mut() {
-                        Some(inner) => {
-                            let (m, e, _) = inner
-                                .interceptor
-                                .call(Request::new(()))
-                                .map_err(|e| {
-                                    OTelSdkError::InternalFailure(format!("error: {e:?}"))
-                                })?
-                                .into_parts();
-                            (inner.client.clone(), m, e)
-                        }
-                        None => return Err(OTelSdkError::AlreadyShutdown),
-                    };
-
-                    let resource_logs = group_logs_by_resource_and_scope(&batch, resource);
-
-                    otel_debug!(name: "TonicLogsClient.ExportStarted");
-
-                    let result = client
-                        .export(Request::from_parts(
-                            metadata,
-                            extensions,
-                            ExportLogsServiceRequest { resource_logs },
+        match retry_with_exponential_backoff_classified(
+            Tokio,
+            policy,
+            classify_tonic_status,
+            "TonicLogsClient.Export",
+            || async {
+                let batch_clone = Arc::clone(&batch);
+
+                // Execute the export operation
+                let (mut client, metadata, extensions) = match self.inner.lock().await.as_mut() {
+                    Some(inner) => {
+                        let (m, e, _) = inner
+                            .interceptor
+                            .call(Request::new(()))
+                            .map_err(|e| {
+                                // Convert interceptor errors to tonic::Status for retry classification
+                                tonic::Status::internal(format!("interceptor error: {e:?}"))
+                            })?
+                            .into_parts();
+                        (inner.client.clone(), m, e)
+                    }
+                    None => {
+                        return Err(tonic::Status::failed_precondition(
+                            "exporter already shutdown",
                         ))
-                        .await;
-
-                    match result {
-                        Ok(_) => {
-                            otel_debug!(name: "TonicLogsClient.ExportSucceeded");
-                            Ok(())
-                        }
-                        Err(e) => {
-                            let error = format!("export error: {e:?}");
-                            otel_debug!(name: "TonicLogsClient.ExportFailed", error = &error);
-                            Err(OTelSdkError::InternalFailure(error))
-                        }
                     }
-                })
-            }
-        })
+                };
+
+                let resource_logs = group_logs_by_resource_and_scope(&batch_clone, &self.resource);
+
+                otel_debug!(name: "TonicLogsClient.ExportStarted");
+
+                client
+                    .export(Request::from_parts(
+                        metadata,
+                        extensions,
+                        ExportLogsServiceRequest { resource_logs },
+                    ))
+                    .await
+                    .map(|_| {
+                        otel_debug!(name: "TonicLogsClient.ExportSucceeded");
+                    })
+            },
+        )
         .await
+        {
+            Ok(_) => Ok(()),
+            Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!(
+                "export error: {tonic_status:?}"
+            ))),
+        }
     }
 
     fn shutdown_with_timeout(&self, _timeout: time::Duration) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index 07926366e0..30a3db1ce0 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -12,7 +12,8 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 use super::BoxInterceptor;
 use crate::metric::MetricsClient;
 
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use crate::retry_classification::grpc::classify_tonic_status;
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicMetricsClient {
@@ -63,56 +64,56 @@ impl MetricsClient for TonicMetricsClient {
             jitter_ms: 100,
         };
 
-        retry_with_exponential_backoff(Tokio, policy, "TonicMetricsClient.Export", {
-            let inner = &self.inner;
-            move || {
-                Box::pin(async move {
-                    let (mut client, metadata, extensions) = inner
-                        .lock()
-                        .map_err(|e| OTelSdkError::InternalFailure(format!("Failed to acquire lock: {e:?}")))
-                        .and_then(|mut inner| match &mut *inner {
-                            Some(inner) => {
-                                let (m, e, _) = inner
-                                    .interceptor
-                                    .call(Request::new(()))
-                                    .map_err(|e| {
-                                        OTelSdkError::InternalFailure(format!(
-                                            "unexpected status while exporting {e:?}"
-                                        ))
-                                    })?
-                                    .into_parts();
-                                Ok((inner.client.clone(), m, e))
-                            }
-                            None => Err(OTelSdkError::InternalFailure(
-                                "exporter is already shut down".into(),
-                            )),
-                        })?;
-
-                    otel_debug!(name: "TonicMetricsClient.ExportStarted");
-
-                    let result = client
-                        .export(Request::from_parts(
-                            metadata,
-                            extensions,
-                            ExportMetricsServiceRequest::from(metrics),
-                        ))
-                        .await;
-
-                    match result {
-                        Ok(_) => {
-                            otel_debug!(name: "TonicMetricsClient.ExportSucceeded");
-                            Ok(())
+        match retry_with_exponential_backoff_classified(
+            Tokio,
+            policy,
+            classify_tonic_status,
+            "TonicMetricsClient.Export",
+            || async {
+                // Execute the export operation
+                let (mut client, metadata, extensions) = self
+                    .inner
+                    .lock()
+                    .map_err(|e| tonic::Status::internal(format!("Failed to acquire lock: {e:?}")))
+                    .and_then(|mut inner| match &mut *inner {
+                        Some(inner) => {
+                            let (m, e, _) = inner
+                                .interceptor
+                                .call(Request::new(()))
+                                .map_err(|e| {
+                                    tonic::Status::internal(format!(
+                                        "unexpected status while exporting {e:?}"
+                                    ))
+                                })?
+                                .into_parts();
+                            Ok((inner.client.clone(), m, e))
                         }
-                        Err(e) => {
-                            let error = format!("export error: {e:?}");
-                            otel_debug!(name: "TonicMetricsClient.ExportFailed", error = &error);
-                            Err(OTelSdkError::InternalFailure(error))
-                        }
-                    }
-                })
-            }
-        })
+                        None => Err(tonic::Status::failed_precondition(
+                            "exporter is already shut down",
+                        )),
+                    })?;
+
+                otel_debug!(name: "TonicMetricsClient.ExportStarted");
+
+                client
+                    .export(Request::from_parts(
+                        metadata,
+                        extensions,
+                        ExportMetricsServiceRequest::from(metrics),
+                    ))
+                    .await
+                    .map(|_| {
+                        otel_debug!(name: "TonicMetricsClient.ExportSucceeded");
+                    })
+            },
+        )
         .await
+        {
+            Ok(_) => Ok(()),
+            Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!(
+                "export error: {tonic_status:?}"
+            ))),
+        }
     }
 
     fn shutdown(&self) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index 2ce8b884a3..5cc27ab188 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -16,7 +16,8 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 
 use super::BoxInterceptor;
 
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
+use crate::retry_classification::grpc::classify_tonic_status;
+use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicTracesClient {
@@ -73,54 +74,60 @@ impl SpanExporter for TonicTracesClient {
 
         let batch = Arc::new(batch);
 
-        retry_with_exponential_backoff(Tokio, policy, "TonicTracesClient.Export", {
-            let batch = Arc::clone(&batch);
-            let inner = &self.inner;
-            let resource = &self.resource;
-            move || {
-                let batch = Arc::clone(&batch);
-                Box::pin(async move {
-                    let (mut client, metadata, extensions) = match inner {
-                        Some(inner) => {
-                            let (m, e, _) = inner
-                                .interceptor
-                                .lock()
-                                .await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
-                                .call(Request::new(()))
-                                .map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
-                                .into_parts();
-                            (inner.client.clone(), m, e)
-                        }
-                        None => return Err(OTelSdkError::AlreadyShutdown),
-                    };
-
-                    let resource_spans = group_spans_by_resource_and_scope((*batch).clone(), resource);
-
-                    otel_debug!(name: "TonicTracesClient.ExportStarted");
-
-                    let result = client
-                        .export(Request::from_parts(
-                            metadata,
-                            extensions,
-                            ExportTraceServiceRequest { resource_spans },
+        match retry_with_exponential_backoff_classified(
+            Tokio,
+            policy,
+            classify_tonic_status,
+            "TonicTracesClient.Export",
+            || async {
+                let batch_clone = Arc::clone(&batch);
+
+                // Execute the export operation
+                let (mut client, metadata, extensions) = match &self.inner {
+                    Some(inner) => {
+                        let (m, e, _) = inner
+                            .interceptor
+                            .lock()
+                            .await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
+                            .call(Request::new(()))
+                            .map_err(|e| {
+                                // Convert interceptor errors to tonic::Status for retry classification
+                                tonic::Status::internal(format!("interceptor error: {e:?}"))
+                            })?
+                            .into_parts();
+                        (inner.client.clone(), m, e)
+                    }
+                    None => {
+                        return Err(tonic::Status::failed_precondition(
+                            "exporter already shutdown",
                         ))
-                        .await;
-
-                    match result {
-                        Ok(_) => {
-                            otel_debug!(name: "TonicTracesClient.ExportSucceeded");
-                            Ok(())
-                        }
-                        Err(e) => {
-                            let error = format!("export error: {e:?}");
-                            otel_debug!(name: "TonicTracesClient.ExportFailed", error = &error);
-                            Err(OTelSdkError::InternalFailure(error))
-                        }
                     }
-                })
-            }
-        })
+                };
+
+                let resource_spans =
+                    group_spans_by_resource_and_scope((*batch_clone).clone(), &self.resource);
+
+                otel_debug!(name: "TonicTracesClient.ExportStarted");
+
+                client
+                    .export(Request::from_parts(
+                        metadata,
+                        extensions,
+                        ExportTraceServiceRequest { resource_spans },
+                    ))
+                    .await
+                    .map(|_| {
+                        otel_debug!(name: "TonicTracesClient.ExportSucceeded");
+                    })
+            },
+        )
         .await
+        {
+            Ok(_) => Ok(()),
+            Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!(
+                "export error: {tonic_status:?}"
+            ))),
+        }
     }
 
     fn shutdown(&mut self) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index 3474b2b590..9e21ffe6b4 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -366,6 +366,9 @@ mod metric;
 #[cfg(any(feature = "http-proto", feature = "http-json", feature = "grpc-tonic"))]
 mod span;
 
+#[cfg(feature = "grpc-tonic")]
+pub mod retry_classification;
+
 pub use crate::exporter::Compression;
 pub use crate::exporter::ExportConfig;
 pub use crate::exporter::ExporterBuildError;
diff --git a/opentelemetry-otlp/src/retry_classification.rs b/opentelemetry-otlp/src/retry_classification.rs
new file mode 100644
index 0000000000..63a395bc3e
--- /dev/null
+++ b/opentelemetry-otlp/src/retry_classification.rs
@@ -0,0 +1,480 @@
+//! Error classification for OTLP exporters with protocol-specific throttling support.
+//!
+//! This module provides error classification functions for HTTP and gRPC protocols,
+//! supporting server-provided throttling hints like HTTP Retry-After headers and
+//! gRPC RetryInfo metadata.
+
+use opentelemetry_sdk::retry::RetryErrorType;
+use std::time::Duration;
+
+#[cfg(feature = "grpc-tonic")]
+use tonic;
+
+#[cfg(feature = "grpc-tonic")]
+use tonic_types::StatusExt;
+
+/// HTTP-specific error classification with Retry-After header support.
+pub mod http {
+    use super::*;
+
+    /// Classifies HTTP errors based on status code and headers.
+    ///
+    /// # Arguments
+    /// * `status_code` - HTTP status code
+    /// * `retry_after_header` - Value of the Retry-After header, if present
+    ///
+    /// # Retry-After Header Formats
+    /// * Seconds: "120"
+    /// * HTTP Date: "Fri, 31 Dec 1999 23:59:59 GMT"
+    pub fn classify_http_error(
+        status_code: u16,
+        retry_after_header: Option<&str>,
+    ) -> RetryErrorType {
+        match status_code {
+            // 429 Too Many Requests - check for Retry-After
+            429 => {
+                if let Some(retry_after) = retry_after_header {
+                    if let Some(duration) = parse_retry_after(retry_after) {
+                        return RetryErrorType::Throttled(duration);
+                    }
+                }
+                // Fallback to retryable if no valid Retry-After
+                RetryErrorType::Retryable
+            }
+            // 5xx Server errors - retryable
+            500..=599 => RetryErrorType::Retryable,
+            // 4xx Client errors (except 429) - not retryable
+            400..=499 => RetryErrorType::NonRetryable,
+            // Other codes - retryable (network issues, etc.)
+            _ => RetryErrorType::Retryable,
+        }
+    }
+
+    /// Parses the Retry-After header value.
+    ///
+    /// Supports both formats:
+    /// - Delay seconds: "120"
+    /// - HTTP date: "Fri, 31 Dec 1999 23:59:59 GMT"
+    ///
+    /// Returns None if parsing fails or delay is unreasonable.
+    fn parse_retry_after(retry_after: &str) -> Option<Duration> {
+        // Try parsing as seconds first
+        if let Ok(seconds) = retry_after.trim().parse::<u64>() {
+            // Cap at 10 minutes for safety
+            let capped_seconds = seconds.min(600);
+            return Some(Duration::from_secs(capped_seconds));
+        }
+
+        // Try parsing as HTTP date
+        if let Ok(delay_seconds) = parse_http_date_to_delay(retry_after) {
+            // Cap at 10 minutes for safety
+            let capped_seconds = delay_seconds.min(600);
+            return Some(Duration::from_secs(capped_seconds));
+        }
+
+        None
+    }
+
+    /// Parses HTTP date format and returns delay in seconds from now.
+    ///
+    /// This is a simplified parser for the most common HTTP date format.
+    /// In production, you might want to use a proper HTTP date parsing library.
+    fn parse_http_date_to_delay(date_str: &str) -> Result<u64, ()> {
+        // For now, return error - would need proper HTTP date parsing
+        // This could be implemented with chrono or similar
+        let _ = date_str;
+        Err(())
+    }
+}
+
+/// gRPC-specific error classification with RetryInfo support.
+pub mod grpc {
+    use super::*;
+
+    /// Classifies a tonic::Status error
+    #[cfg(feature = "grpc-tonic")]
+    pub fn classify_tonic_status(status: &tonic::Status) -> RetryErrorType {
+        // Use tonic-types to extract RetryInfo - this is the proper way!
+        let retry_info_seconds = status
+            .get_details_retry_info()
+            .and_then(|retry_info| retry_info.retry_delay)
+            .map(|duration| duration.as_secs());
+
+        classify_grpc_error(status.code(), retry_info_seconds)
+    }
+
+    /// Classifies gRPC errors based on status code and metadata.
+    ///
+    /// Implements the OpenTelemetry OTLP specification for error handling:
+    /// https://opentelemetry.io/docs/specs/otlp/
+    /// https://github.com/open-telemetry/opentelemetry-proto/blob/main/docs/specification.md#failures
+    ///
+    /// # Arguments
+    /// * `grpc_code` - gRPC status code as tonic::Code enum
+    /// * `retry_info_seconds` - Parsed retry delay from RetryInfo metadata, if present
+    fn classify_grpc_error(
+        grpc_code: tonic::Code,
+        retry_info_seconds: Option<u64>,
+    ) -> RetryErrorType {
+        match grpc_code {
+            // RESOURCE_EXHAUSTED: Special case per OTLP spec
+            // Retryable only if server provides RetryInfo indicating recovery is possible
+            tonic::Code::ResourceExhausted => {
+                if let Some(seconds) = retry_info_seconds {
+                    // Server signals recovery is possible - use throttled retry
+                    let capped_seconds = seconds.min(600); // Cap at 10 minutes for safety
+                    return RetryErrorType::Throttled(std::time::Duration::from_secs(
+                        capped_seconds,
+                    ));
+                }
+                // No RetryInfo - treat as non-retryable per OTLP spec
+                RetryErrorType::NonRetryable
+            }
+
+            // Retryable errors per OTLP specification
+            tonic::Code::Cancelled
+            | tonic::Code::DeadlineExceeded
+            | tonic::Code::Aborted
+            | tonic::Code::OutOfRange
+            | tonic::Code::Unavailable
+            | tonic::Code::DataLoss => RetryErrorType::Retryable,
+
+            // Non-retryable errors per OTLP specification
+            tonic::Code::Unknown
+            | tonic::Code::InvalidArgument
+            | tonic::Code::NotFound
+            | tonic::Code::AlreadyExists
+            | tonic::Code::PermissionDenied
+            | tonic::Code::FailedPrecondition
+            | tonic::Code::Unimplemented
+            | tonic::Code::Internal
+            | tonic::Code::Unauthenticated => RetryErrorType::NonRetryable,
+
+            // OK should never reach here in error scenarios, but handle gracefully
+            tonic::Code::Ok => RetryErrorType::NonRetryable,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Tests for HTTP error classification
+    mod http_tests {
+        use super::*;
+        use crate::retry_classification::http::*;
+
+        #[test]
+        fn test_http_429_with_retry_after_seconds() {
+            let result = classify_http_error(429, Some("30"));
+            assert_eq!(result, RetryErrorType::Throttled(Duration::from_secs(30)));
+        }
+
+        #[test]
+        fn test_http_429_with_large_retry_after_capped() {
+            let result = classify_http_error(429, Some("900")); // 15 minutes
+            assert_eq!(
+                result,
+                RetryErrorType::Throttled(std::time::Duration::from_secs(600))
+            ); // Capped at 10 minutes
+        }
+
+        #[test]
+        fn test_http_429_with_invalid_retry_after() {
+            let result = classify_http_error(429, Some("invalid"));
+            assert_eq!(result, RetryErrorType::Retryable); // Fallback
+        }
+
+        #[test]
+        fn test_http_429_without_retry_after() {
+            let result = classify_http_error(429, None);
+            assert_eq!(result, RetryErrorType::Retryable); // Fallback
+        }
+
+        #[test]
+        fn test_http_5xx_errors() {
+            assert_eq!(classify_http_error(500, None), RetryErrorType::Retryable);
+            assert_eq!(classify_http_error(502, None), RetryErrorType::Retryable);
+            assert_eq!(classify_http_error(503, None), RetryErrorType::Retryable);
+            assert_eq!(classify_http_error(599, None), RetryErrorType::Retryable);
+        }
+
+        #[test]
+        fn test_http_4xx_errors() {
+            assert_eq!(classify_http_error(400, None), RetryErrorType::NonRetryable);
+            assert_eq!(classify_http_error(401, None), RetryErrorType::NonRetryable);
+            assert_eq!(classify_http_error(403, None), RetryErrorType::NonRetryable);
+            assert_eq!(classify_http_error(404, None), RetryErrorType::NonRetryable);
+            assert_eq!(classify_http_error(499, None), RetryErrorType::NonRetryable);
+        }
+
+        #[test]
+        fn test_http_other_errors() {
+            assert_eq!(classify_http_error(100, None), RetryErrorType::Retryable);
+            assert_eq!(classify_http_error(200, None), RetryErrorType::Retryable);
+            assert_eq!(classify_http_error(300, None), RetryErrorType::Retryable);
+        }
+    }
+
+    // Tests for gRPC error classification using public interface
+    #[cfg(feature = "grpc-tonic")]
+    mod grpc_tests {
+        use crate::retry_classification::grpc::classify_tonic_status;
+        use opentelemetry_sdk::retry::RetryErrorType;
+        use tonic_types::{ErrorDetails, StatusExt};
+
+        #[test]
+        fn test_grpc_resource_exhausted_with_retry_info() {
+            let error_details =
+                ErrorDetails::with_retry_info(Some(std::time::Duration::from_secs(45)));
+            let status = tonic::Status::with_error_details(
+                tonic::Code::ResourceExhausted,
+                "rate limited",
+                error_details,
+            );
+            let result = classify_tonic_status(&status);
+            assert_eq!(
+                result,
+                RetryErrorType::Throttled(std::time::Duration::from_secs(45))
+            );
+        }
+
+        #[test]
+        fn test_grpc_resource_exhausted_with_large_retry_info_capped() {
+            let error_details =
+                ErrorDetails::with_retry_info(Some(std::time::Duration::from_secs(900))); // 15 minutes
+            let status = tonic::Status::with_error_details(
+                tonic::Code::ResourceExhausted,
+                "rate limited",
+                error_details,
+            );
+            let result = classify_tonic_status(&status);
+            assert_eq!(
+                result,
+                RetryErrorType::Throttled(std::time::Duration::from_secs(600))
+            ); // Capped at 10 minutes
+        }
+
+        #[test]
+        fn test_grpc_resource_exhausted_without_retry_info() {
+            let status = tonic::Status::new(tonic::Code::ResourceExhausted, "rate limited");
+            let result = classify_tonic_status(&status);
+            // Per OTLP spec: RESOURCE_EXHAUSTED without RetryInfo is non-retryable
+            assert_eq!(result, RetryErrorType::NonRetryable);
+        }
+
+        #[test]
+        fn test_grpc_retryable_errors() {
+            // Test all retryable errors per OTLP specification
+            let cancelled = tonic::Status::new(tonic::Code::Cancelled, "cancelled");
+            assert_eq!(classify_tonic_status(&cancelled), RetryErrorType::Retryable);
+
+            let deadline_exceeded =
+                tonic::Status::new(tonic::Code::DeadlineExceeded, "deadline exceeded");
+            assert_eq!(
+                classify_tonic_status(&deadline_exceeded),
+                RetryErrorType::Retryable
+            );
+
+            let aborted = tonic::Status::new(tonic::Code::Aborted, "aborted");
+            assert_eq!(classify_tonic_status(&aborted), RetryErrorType::Retryable);
+
+            let out_of_range = tonic::Status::new(tonic::Code::OutOfRange, "out of range");
+            assert_eq!(
+                classify_tonic_status(&out_of_range),
+                RetryErrorType::Retryable
+            );
+
+            let unavailable = tonic::Status::new(tonic::Code::Unavailable, "unavailable");
+            assert_eq!(
+                classify_tonic_status(&unavailable),
+                RetryErrorType::Retryable
+            );
+
+            let data_loss = tonic::Status::new(tonic::Code::DataLoss, "data loss");
+            assert_eq!(classify_tonic_status(&data_loss), RetryErrorType::Retryable);
+        }
+
+        #[test]
+        fn test_grpc_non_retryable_errors() {
+            // Test all non-retryable errors per OTLP specification
+            let unknown = tonic::Status::new(tonic::Code::Unknown, "unknown");
+            assert_eq!(
+                classify_tonic_status(&unknown),
+                RetryErrorType::NonRetryable
+            );
+
+            let invalid_argument =
+                tonic::Status::new(tonic::Code::InvalidArgument, "invalid argument");
+            assert_eq!(
+                classify_tonic_status(&invalid_argument),
+                RetryErrorType::NonRetryable
+            );
+
+            let not_found = tonic::Status::new(tonic::Code::NotFound, "not found");
+            assert_eq!(
+                classify_tonic_status(&not_found),
+                RetryErrorType::NonRetryable
+            );
+
+            let already_exists = tonic::Status::new(tonic::Code::AlreadyExists, "already exists");
+            assert_eq!(
+                classify_tonic_status(&already_exists),
+                RetryErrorType::NonRetryable
+            );
+
+            let permission_denied =
+                tonic::Status::new(tonic::Code::PermissionDenied, "permission denied");
+            assert_eq!(
+                classify_tonic_status(&permission_denied),
+                RetryErrorType::NonRetryable
+            );
+
+            let failed_precondition =
+                tonic::Status::new(tonic::Code::FailedPrecondition, "failed precondition");
+            assert_eq!(
+                classify_tonic_status(&failed_precondition),
+                RetryErrorType::NonRetryable
+            );
+
+            let unimplemented = tonic::Status::new(tonic::Code::Unimplemented, "unimplemented");
+            assert_eq!(
+                classify_tonic_status(&unimplemented),
+                RetryErrorType::NonRetryable
+            );
+
+            let internal = tonic::Status::new(tonic::Code::Internal, "internal error");
+            assert_eq!(
+                classify_tonic_status(&internal),
+                RetryErrorType::NonRetryable
+            );
+
+            let unauthenticated =
+                tonic::Status::new(tonic::Code::Unauthenticated, "unauthenticated");
+            assert_eq!(
+                classify_tonic_status(&unauthenticated),
+                RetryErrorType::NonRetryable
+            );
+        }
+
+        #[test]
+        fn test_grpc_ok_code_handled() {
+            // OK status should be handled gracefully (though unlikely in error scenarios)
+            let ok = tonic::Status::new(tonic::Code::Ok, "success");
+            assert_eq!(classify_tonic_status(&ok), RetryErrorType::NonRetryable);
+        }
+
+        // Tests for tonic-types RetryInfo integration
+        #[cfg(feature = "grpc-tonic")]
+        mod retry_info_tests {
+            use super::*;
+            use crate::retry_classification::grpc::classify_tonic_status;
+            use tonic_types::{ErrorDetails, StatusExt};
+
+            #[test]
+            fn test_classify_status_with_retry_info() {
+                // Create a tonic::Status with RetryInfo using proper StatusExt API
+                let error_details =
+                    ErrorDetails::with_retry_info(Some(std::time::Duration::from_secs(30)));
+                let status = tonic::Status::with_error_details(
+                    tonic::Code::ResourceExhausted,
+                    "rate limited",
+                    error_details,
+                );
+
+                // Test classification
+                let result = classify_tonic_status(&status);
+                assert_eq!(
+                    result,
+                    RetryErrorType::Throttled(std::time::Duration::from_secs(30))
+                );
+            }
+
+            #[test]
+            fn test_classify_status_with_fractional_retry_info() {
+                // Create a tonic::Status with fractional seconds RetryInfo
+                let error_details =
+                    ErrorDetails::with_retry_info(Some(std::time::Duration::from_millis(5500))); // 5.5 seconds
+                let status = tonic::Status::with_error_details(
+                    tonic::Code::ResourceExhausted,
+                    "rate limited",
+                    error_details,
+                );
+
+                // Should use exact duration (5.5s = 5s)
+                let result = classify_tonic_status(&status);
+                assert_eq!(
+                    result,
+                    RetryErrorType::Throttled(std::time::Duration::from_secs(5))
+                );
+            }
+
+            #[test]
+            fn test_classify_status_without_retry_info() {
+                // Status with resource_exhausted but no RetryInfo
+                let status = tonic::Status::new(tonic::Code::ResourceExhausted, "rate limited");
+
+                // Per OTLP spec: should be non-retryable without RetryInfo
+                let result = classify_tonic_status(&status);
+                assert_eq!(result, RetryErrorType::NonRetryable);
+            }
+
+            #[test]
+            fn test_classify_status_non_retryable_error() {
+                // Status with non-retryable error code
+                let status = tonic::Status::new(tonic::Code::InvalidArgument, "bad request");
+
+                let result = classify_tonic_status(&status);
+                assert_eq!(result, RetryErrorType::NonRetryable);
+            }
+
+            #[test]
+            fn test_classify_status_retryable_error() {
+                // Status with retryable error code
+                let status = tonic::Status::new(tonic::Code::Unavailable, "service unavailable");
+
+                let result = classify_tonic_status(&status);
+                assert_eq!(result, RetryErrorType::Retryable);
+            }
+
+            #[test]
+            fn test_classify_status_large_retry_delay() {
+                // Test with large retry delay - should be capped at 10 minutes
+                let error_details =
+                    ErrorDetails::with_retry_info(Some(std::time::Duration::from_secs(3600))); // 1 hour
+                let status = tonic::Status::with_error_details(
+                    tonic::Code::ResourceExhausted,
+                    "rate limited",
+                    error_details,
+                );
+
+                let result = classify_tonic_status(&status);
+                // Should be capped at 10 minutes (600 seconds)
+                assert_eq!(
+                    result,
+                    RetryErrorType::Throttled(std::time::Duration::from_secs(600))
+                );
+            }
+
+            #[test]
+            fn test_status_ext_get_details() {
+                // Test that StatusExt works correctly
+                let error_details =
+                    ErrorDetails::with_retry_info(Some(std::time::Duration::from_secs(45)));
+                let status = tonic::Status::with_error_details(
+                    tonic::Code::ResourceExhausted,
+                    "rate limited",
+                    error_details,
+                );
+
+                // Direct extraction should work
+                let extracted = status.get_details_retry_info();
+                assert!(extracted.is_some());
+
+                let retry_delay = extracted.unwrap().retry_delay;
+                assert_eq!(retry_delay, Some(std::time::Duration::from_secs(45)));
+            }
+        }
+    }
+}
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-sdk/src/retry.rs
index 1ff347b096..a46e91f628 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -12,12 +12,25 @@
 use opentelemetry::otel_warn;
 #[cfg(feature = "experimental_async_runtime")]
 use std::future::Future;
+use std::time::Duration;
 #[cfg(feature = "experimental_async_runtime")]
-use std::time::{Duration, SystemTime};
+use std::time::SystemTime;
 
 #[cfg(feature = "experimental_async_runtime")]
 use crate::runtime::Runtime;
 
+/// Classification of errors for retry purposes.
+#[derive(Debug, Clone, PartialEq)]
+pub enum RetryErrorType {
+    /// Error is not retryable (e.g., authentication failure, bad request).
+    NonRetryable,
+    /// Error is retryable with exponential backoff (e.g., server error, network timeout).
+    Retryable,
+    /// Error indicates throttling - wait for the specified duration before retrying.
+    /// This overrides exponential backoff timing.
+    Throttled(Duration),
+}
+
 /// Configuration for retry policy.
 #[derive(Debug)]
 pub struct RetryPolicy {
@@ -73,6 +86,50 @@ pub async fn retry_with_exponential_backoff<R, F, Fut, T, E>(
     runtime: R,
     policy: RetryPolicy,
     operation_name: &str,
+    operation: F,
+) -> Result<T, E>
+where
+    R: Runtime,
+    F: FnMut() -> Fut,
+    E: std::fmt::Debug,
+    Fut: Future<Output = Result<T, E>>,
+{
+    // Use a simple classifier that treats all errors as retryable
+    let simple_classifier = |_: &E| RetryErrorType::Retryable;
+
+    retry_with_exponential_backoff_classified(
+        runtime,
+        policy,
+        simple_classifier,
+        operation_name,
+        operation,
+    )
+    .await
+}
+
+/// Enhanced retry with exponential backoff, jitter, and error classification.
+///
+/// This function provides sophisticated retry behavior by classifying errors
+/// and honoring server-provided throttling hints (e.g., HTTP Retry-After, gRPC RetryInfo).
+///
+/// # Arguments
+///
+/// * `runtime` - The async runtime to use for delays.
+/// * `policy` - The retry policy configuration.
+/// * `error_classifier` - Function to classify errors for retry decisions.
+/// * `operation_name` - The name of the operation being retried.
+/// * `operation` - The operation to be retried.
+///
+/// # Returns
+///
+/// A `Result` containing the operation's result or an error if max retries are reached
+/// or a non-retryable error occurs.
+#[cfg(feature = "experimental_async_runtime")]
+pub async fn retry_with_exponential_backoff_classified<R, F, Fut, T, E, C>(
+    runtime: R,
+    policy: RetryPolicy,
+    error_classifier: C,
+    operation_name: &str,
     mut operation: F,
 ) -> Result<T, E>
 where
@@ -80,6 +137,7 @@ where
     F: FnMut() -> Fut,
     E: std::fmt::Debug,
     Fut: Future<Output = Result<T, E>>,
+    C: Fn(&E) -> RetryErrorType,
 {
     let mut attempt = 0;
     let mut delay = policy.initial_delay_ms;
@@ -87,18 +145,40 @@ where
     loop {
         match operation().await {
             Ok(result) => return Ok(result), // Return the result if the operation succeeds
-            Err(err) if attempt < policy.max_retries => {
-                attempt += 1;
-                // Log the error and retry after a delay with jitter
-                otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to error: {:?}", operation_name, err));
-                let jitter = generate_jitter(policy.jitter_ms);
-                let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
-                runtime
-                    .delay(Duration::from_millis(delay_with_jitter))
-                    .await;
-                delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
+            Err(err) => {
+                // Classify the error
+                let error_type = error_classifier(&err);
+
+                match error_type {
+                    RetryErrorType::NonRetryable => {
+                        otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed with non-retryable error: {:?}", operation_name, err));
+                        return Err(err);
+                    }
+                    RetryErrorType::Retryable if attempt < policy.max_retries => {
+                        attempt += 1;
+                        // Use exponential backoff with jitter
+                        otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to retryable error: {:?}", operation_name, err));
+                        let jitter = generate_jitter(policy.jitter_ms);
+                        let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
+                        runtime
+                            .delay(Duration::from_millis(delay_with_jitter))
+                            .await;
+                        delay = std::cmp::min(delay * 2, policy.max_delay_ms); // Exponential backoff
+                    }
+                    RetryErrorType::Throttled(server_delay) if attempt < policy.max_retries => {
+                        attempt += 1;
+                        // Use server-specified delay (overrides exponential backoff)
+                        otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} after server-specified throttling delay: {:?}", operation_name, server_delay));
+                        runtime.delay(server_delay).await;
+                        // Don't update exponential backoff delay for next attempt since server provided specific timing
+                    }
+                    _ => {
+                        // Max retries reached
+                        otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed after {} attempts: {:?}", operation_name, attempt, err));
+                        return Err(err);
+                    }
+                }
             }
-            Err(err) => return Err(err), // Return the error if max retries are reached
         }
     }
 }
@@ -228,4 +308,199 @@ mod tests {
 
         assert!(result.is_err()); // Ensure the operation times out
     }
+
+    // Tests for error classification (Phase 1)
+    #[test]
+    fn test_retry_error_type_equality() {
+        assert_eq!(RetryErrorType::NonRetryable, RetryErrorType::NonRetryable);
+        assert_eq!(RetryErrorType::Retryable, RetryErrorType::Retryable);
+        assert_eq!(
+            RetryErrorType::Throttled(Duration::from_secs(30)),
+            RetryErrorType::Throttled(Duration::from_secs(30))
+        );
+        assert_ne!(RetryErrorType::Retryable, RetryErrorType::NonRetryable);
+    }
+
+    // Tests for enhanced retry function (Phase 3)
+    #[tokio::test]
+    async fn test_retry_with_throttling_non_retryable_error() {
+        let runtime = Tokio;
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        // Classifier that returns non-retryable
+        let classifier = |_: &()| RetryErrorType::NonRetryable;
+
+        let result = retry_with_exponential_backoff_classified(
+            runtime,
+            policy,
+            classifier,
+            "test_operation",
+            || {
+                attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async { Err::<(), _>(()) }) // Always fail
+            },
+        )
+        .await;
+
+        assert!(result.is_err());
+        assert_eq!(attempts.load(Ordering::SeqCst), 1); // Should only try once
+    }
+
+    #[tokio::test]
+    async fn test_retry_with_throttling_retryable_error() {
+        let runtime = Tokio;
+        let policy = RetryPolicy {
+            max_retries: 2,
+            initial_delay_ms: 10, // Short delay for test
+            max_delay_ms: 100,
+            jitter_ms: 5,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        // Classifier that returns retryable
+        let classifier = |_: &()| RetryErrorType::Retryable;
+
+        let result = retry_with_exponential_backoff_classified(
+            runtime,
+            policy,
+            classifier,
+            "test_operation",
+            || {
+                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    if attempt < 1 {
+                        Err::<&str, ()>(()) // Fail first attempt
+                    } else {
+                        Ok("success") // Succeed on retry
+                    }
+                })
+            },
+        )
+        .await;
+
+        assert_eq!(result, Ok("success"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 2); // Should try twice
+    }
+
+    #[tokio::test]
+    async fn test_retry_with_throttling_throttled_error() {
+        let runtime = Tokio;
+        let policy = RetryPolicy {
+            max_retries: 2,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        // Classifier that returns throttled with short delay
+        let classifier = |_: &()| RetryErrorType::Throttled(Duration::from_millis(10));
+
+        let start_time = std::time::Instant::now();
+
+        let result = retry_with_exponential_backoff_classified(
+            runtime,
+            policy,
+            classifier,
+            "test_operation",
+            || {
+                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    if attempt < 1 {
+                        Err::<&str, ()>(()) // Fail first attempt (will be throttled)
+                    } else {
+                        Ok("success") // Succeed on retry
+                    }
+                })
+            },
+        )
+        .await;
+
+        let elapsed = start_time.elapsed();
+
+        assert_eq!(result, Ok("success"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 2); // Should try twice
+        assert!(elapsed >= Duration::from_millis(10)); // Should have waited for throttle delay
+    }
+
+    #[tokio::test]
+    async fn test_retry_with_throttling_max_attempts_exceeded() {
+        let runtime = Tokio;
+        let policy = RetryPolicy {
+            max_retries: 1, // Only 1 retry
+            initial_delay_ms: 10,
+            max_delay_ms: 100,
+            jitter_ms: 5,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        // Classifier that returns retryable
+        let classifier = |_: &()| RetryErrorType::Retryable;
+
+        let result = retry_with_exponential_backoff_classified(
+            runtime,
+            policy,
+            classifier,
+            "test_operation",
+            || {
+                attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async { Err::<(), _>(()) }) // Always fail
+            },
+        )
+        .await;
+
+        assert!(result.is_err());
+        assert_eq!(attempts.load(Ordering::SeqCst), 2); // Initial attempt + 1 retry
+    }
+
+    #[tokio::test]
+    async fn test_retry_with_throttling_mixed_error_types() {
+        let runtime = Tokio;
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 10,
+            max_delay_ms: 100,
+            jitter_ms: 5,
+        };
+
+        let attempts = AtomicUsize::new(0);
+
+        // Classifier that returns different types based on attempt number
+        let classifier = |err: &usize| match *err {
+            0 => RetryErrorType::Retryable,
+            1 => RetryErrorType::Throttled(Duration::from_millis(10)),
+            _ => RetryErrorType::Retryable,
+        };
+
+        let result = retry_with_exponential_backoff_classified(
+            runtime,
+            policy,
+            classifier,
+            "test_operation",
+            || {
+                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    if attempt < 2 {
+                        Err(attempt) // Return attempt number as error
+                    } else {
+                        Ok("success") // Succeed on third attempt
+                    }
+                })
+            },
+        )
+        .await;
+
+        assert_eq!(result, Ok("success"));
+        assert_eq!(attempts.load(Ordering::SeqCst), 3); // Should try three times
+    }
 }

From 30fdb4f464613e0e8bfb14245a0687d07206d487 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 12:25:59 +0200
Subject: [PATCH 13/27] simplify retry interface

---
 opentelemetry-otlp/src/exporter/tonic/logs.rs |   4 +-
 .../src/exporter/tonic/metrics.rs             |   4 +-
 .../src/exporter/tonic/trace.rs               |   4 +-
 opentelemetry-sdk/src/retry.rs                | 219 +++++++-----------
 4 files changed, 93 insertions(+), 138 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 279ac72d84..fee1244686 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -15,7 +15,7 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 use super::BoxInterceptor;
 
 use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicLogsClient {
@@ -72,7 +72,7 @@ impl LogExporter for TonicLogsClient {
 
         let batch = Arc::new(batch);
 
-        match retry_with_exponential_backoff_classified(
+        match retry_with_backoff(
             Tokio,
             policy,
             classify_tonic_status,
diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index 30a3db1ce0..d6244b4bae 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -13,7 +13,7 @@ use super::BoxInterceptor;
 use crate::metric::MetricsClient;
 
 use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicMetricsClient {
@@ -64,7 +64,7 @@ impl MetricsClient for TonicMetricsClient {
             jitter_ms: 100,
         };
 
-        match retry_with_exponential_backoff_classified(
+        match retry_with_backoff(
             Tokio,
             policy,
             classify_tonic_status,
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index 5cc27ab188..a5239b4602 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -17,7 +17,7 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 use super::BoxInterceptor;
 
 use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_exponential_backoff_classified, RetryPolicy};
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicTracesClient {
@@ -74,7 +74,7 @@ impl SpanExporter for TonicTracesClient {
 
         let batch = Arc::new(batch);
 
-        match retry_with_exponential_backoff_classified(
+        match retry_with_backoff(
             Tokio,
             policy,
             classify_tonic_status,
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-sdk/src/retry.rs
index a46e91f628..48f4cf49a7 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -3,10 +3,10 @@
 //! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
 //! number of retries, initial delay, maximum delay, and jitter.
 //!
-//! The `retry_with_exponential_backoff` function retries the given operation according to the
+//! The `retry_with_backoff` function retries the given operation according to the
 //! specified retry policy, using exponential backoff and jitter to determine the delay between
-//! retries. The function logs errors and retries the operation until it succeeds or the maximum
-//! number of retries is reached.
+//! retries. The function uses error classification to determine retry behavior and can honor
+//! server-provided throttling hints.
 
 #[cfg(feature = "experimental_async_runtime")]
 use opentelemetry::otel_warn;
@@ -69,45 +69,7 @@ fn generate_jitter(max_jitter: u64) -> u64 {
     nanos as u64 % (max_jitter + 1)
 }
 
-/// Retries the given operation with exponential backoff and jitter.
-///
-/// # Arguments
-///
-/// * `runtime` - The async runtime to use for delays.
-/// * `policy` - The retry policy configuration.
-/// * `operation_name` - The name of the operation being retried.
-/// * `operation` - The operation to be retried.
-///
-/// # Returns
-///
-/// A `Result` containing the operation's result or an error if the maximum retries are reached.
-#[cfg(feature = "experimental_async_runtime")]
-pub async fn retry_with_exponential_backoff<R, F, Fut, T, E>(
-    runtime: R,
-    policy: RetryPolicy,
-    operation_name: &str,
-    operation: F,
-) -> Result<T, E>
-where
-    R: Runtime,
-    F: FnMut() -> Fut,
-    E: std::fmt::Debug,
-    Fut: Future<Output = Result<T, E>>,
-{
-    // Use a simple classifier that treats all errors as retryable
-    let simple_classifier = |_: &E| RetryErrorType::Retryable;
-
-    retry_with_exponential_backoff_classified(
-        runtime,
-        policy,
-        simple_classifier,
-        operation_name,
-        operation,
-    )
-    .await
-}
-
-/// Enhanced retry with exponential backoff, jitter, and error classification.
+/// Retries the given operation with exponential backoff, jitter, and error classification.
 ///
 /// This function provides sophisticated retry behavior by classifying errors
 /// and honoring server-provided throttling hints (e.g., HTTP Retry-After, gRPC RetryInfo).
@@ -125,7 +87,7 @@ where
 /// A `Result` containing the operation's result or an error if max retries are reached
 /// or a non-retryable error occurs.
 #[cfg(feature = "experimental_async_runtime")]
-pub async fn retry_with_exponential_backoff_classified<R, F, Fut, T, E, C>(
+pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     runtime: R,
     policy: RetryPolicy,
     error_classifier: C,
@@ -186,9 +148,10 @@ where
 /// No-op retry function for when experimental_async_runtime is not enabled.
 /// This function will execute the operation exactly once without any retries.
 #[cfg(not(feature = "experimental_async_runtime"))]
-pub async fn retry_with_exponential_backoff<R, F, Fut, T, E>(
+pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     _runtime: R,
     _policy: RetryPolicy,
+    _error_classifier: C,
     _operation_name: &str,
     mut operation: F,
 ) -> Result<T, E>
@@ -227,9 +190,13 @@ mod tests {
             jitter_ms: 100,
         };
 
-        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
-            Box::pin(async { Ok::<_, ()>("success") })
-        })
+        let result = retry_with_backoff(
+            runtime,
+            policy,
+            |_: &()| RetryErrorType::Retryable,
+            "test_operation",
+            || Box::pin(async { Ok::<_, ()>("success") }),
+        )
         .await;
 
         assert_eq!(result, Ok("success"));
@@ -248,16 +215,22 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
-            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-            Box::pin(async move {
-                if attempt < 2 {
-                    Err::<&str, &str>("error") // Fail the first two attempts
-                } else {
-                    Ok::<&str, &str>("success") // Succeed on the third attempt
-                }
-            })
-        })
+        let result = retry_with_backoff(
+            runtime,
+            policy,
+            |_: &&str| RetryErrorType::Retryable,
+            "test_operation",
+            || {
+                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    if attempt < 2 {
+                        Err::<&str, &str>("error") // Fail the first two attempts
+                    } else {
+                        Ok::<&str, &str>("success") // Succeed on the third attempt
+                    }
+                })
+            },
+        )
         .await;
 
         assert_eq!(result, Ok("success"));
@@ -277,10 +250,16 @@ mod tests {
 
         let attempts = AtomicUsize::new(0);
 
-        let result = retry_with_exponential_backoff(runtime, policy, "test_operation", || {
-            attempts.fetch_add(1, Ordering::SeqCst);
-            Box::pin(async { Err::<(), _>("error") }) // Always fail
-        })
+        let result = retry_with_backoff(
+            runtime,
+            policy,
+            |_: &&str| RetryErrorType::Retryable,
+            "test_operation",
+            || {
+                attempts.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async { Err::<(), _>("error") }) // Always fail
+            },
+        )
         .await;
 
         assert_eq!(result, Err("error"));
@@ -300,9 +279,15 @@ mod tests {
 
         let result = timeout(
             Duration::from_secs(1),
-            retry_with_exponential_backoff(runtime, policy, "test_operation", || {
-                Box::pin(async { Err::<(), _>("error") }) // Always fail
-            }),
+            retry_with_backoff(
+                runtime,
+                policy,
+                |_: &&str| RetryErrorType::Retryable,
+                "test_operation",
+                || {
+                    Box::pin(async { Err::<(), _>("error") }) // Always fail
+                },
+            ),
         )
         .await;
 
@@ -337,16 +322,10 @@ mod tests {
         // Classifier that returns non-retryable
         let classifier = |_: &()| RetryErrorType::NonRetryable;
 
-        let result = retry_with_exponential_backoff_classified(
-            runtime,
-            policy,
-            classifier,
-            "test_operation",
-            || {
-                attempts.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async { Err::<(), _>(()) }) // Always fail
-            },
-        )
+        let result = retry_with_backoff(runtime, policy, classifier, "test_operation", || {
+            attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async { Err::<(), _>(()) }) // Always fail
+        })
         .await;
 
         assert!(result.is_err());
@@ -368,22 +347,16 @@ mod tests {
         // Classifier that returns retryable
         let classifier = |_: &()| RetryErrorType::Retryable;
 
-        let result = retry_with_exponential_backoff_classified(
-            runtime,
-            policy,
-            classifier,
-            "test_operation",
-            || {
-                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async move {
-                    if attempt < 1 {
-                        Err::<&str, ()>(()) // Fail first attempt
-                    } else {
-                        Ok("success") // Succeed on retry
-                    }
-                })
-            },
-        )
+        let result = retry_with_backoff(runtime, policy, classifier, "test_operation", || {
+            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if attempt < 1 {
+                    Err::<&str, ()>(()) // Fail first attempt
+                } else {
+                    Ok("success") // Succeed on retry
+                }
+            })
+        })
         .await;
 
         assert_eq!(result, Ok("success"));
@@ -407,22 +380,16 @@ mod tests {
 
         let start_time = std::time::Instant::now();
 
-        let result = retry_with_exponential_backoff_classified(
-            runtime,
-            policy,
-            classifier,
-            "test_operation",
-            || {
-                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async move {
-                    if attempt < 1 {
-                        Err::<&str, ()>(()) // Fail first attempt (will be throttled)
-                    } else {
-                        Ok("success") // Succeed on retry
-                    }
-                })
-            },
-        )
+        let result = retry_with_backoff(runtime, policy, classifier, "test_operation", || {
+            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if attempt < 1 {
+                    Err::<&str, ()>(()) // Fail first attempt (will be throttled)
+                } else {
+                    Ok("success") // Succeed on retry
+                }
+            })
+        })
         .await;
 
         let elapsed = start_time.elapsed();
@@ -447,16 +414,10 @@ mod tests {
         // Classifier that returns retryable
         let classifier = |_: &()| RetryErrorType::Retryable;
 
-        let result = retry_with_exponential_backoff_classified(
-            runtime,
-            policy,
-            classifier,
-            "test_operation",
-            || {
-                attempts.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async { Err::<(), _>(()) }) // Always fail
-            },
-        )
+        let result = retry_with_backoff(runtime, policy, classifier, "test_operation", || {
+            attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async { Err::<(), _>(()) }) // Always fail
+        })
         .await;
 
         assert!(result.is_err());
@@ -482,22 +443,16 @@ mod tests {
             _ => RetryErrorType::Retryable,
         };
 
-        let result = retry_with_exponential_backoff_classified(
-            runtime,
-            policy,
-            classifier,
-            "test_operation",
-            || {
-                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async move {
-                    if attempt < 2 {
-                        Err(attempt) // Return attempt number as error
-                    } else {
-                        Ok("success") // Succeed on third attempt
-                    }
-                })
-            },
-        )
+        let result = retry_with_backoff(runtime, policy, classifier, "test_operation", || {
+            let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if attempt < 2 {
+                    Err(attempt) // Return attempt number as error
+                } else {
+                    Ok("success") // Succeed on third attempt
+                }
+            })
+        })
         .await;
 
         assert_eq!(result, Ok("success"));

From 61766de1a23b85dd70a0845ca26aa3da85913051 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 13:10:07 +0200
Subject: [PATCH 14/27] Implement retry for http trace

---
 opentelemetry-otlp/Cargo.toml                 |   5 +
 opentelemetry-otlp/src/exporter/http/trace.rs | 131 ++++++++++++++++++
 opentelemetry-otlp/src/lib.rs                 |   2 +-
 .../src/retry_classification.rs               |   9 +-
 4 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index 4c70fe1205..10939cac5f 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -83,6 +83,11 @@ tls-webpki-roots = ["tls", "tonic/tls-webpki-roots"]
 
 # http binary
 http-proto = ["prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "http", "trace", "metrics"]
+
+# http with retry support.
+# What should we do with this? We need the async_runtime. gRPC exporters already need it.
+http-retry = ["opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio", "tokio"]
+
 http-json = ["serde_json", "prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "opentelemetry-proto/with-serde", "http", "trace", "metrics"]
 reqwest-blocking-client = ["reqwest/blocking", "opentelemetry-http/reqwest-blocking"]
 reqwest-client = ["reqwest", "opentelemetry-http/reqwest"]
diff --git a/opentelemetry-otlp/src/exporter/http/trace.rs b/opentelemetry-otlp/src/exporter/http/trace.rs
index 28bc7fc5fb..b94d74235d 100644
--- a/opentelemetry-otlp/src/exporter/http/trace.rs
+++ b/opentelemetry-otlp/src/exporter/http/trace.rs
@@ -8,7 +8,138 @@ use opentelemetry_sdk::{
     trace::{SpanData, SpanExporter},
 };
 
+#[cfg(feature = "http-retry")]
+use crate::retry_classification::http::classify_http_error;
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryErrorType, RetryPolicy};
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::runtime::Tokio;
+
+#[cfg(feature = "http-retry")]
+/// HTTP-specific error wrapper for retry classification
+#[derive(Debug)]
+struct HttpExportError {
+    status_code: u16,
+    retry_after: Option<String>,
+    message: String,
+}
+
+#[cfg(feature = "http-retry")]
+/// Classify HTTP export errors for retry decisions
+fn classify_http_export_error(error: &HttpExportError) -> RetryErrorType {
+    classify_http_error(error.status_code, error.retry_after.as_deref())
+}
+
 impl SpanExporter for OtlpHttpClient {
+    #[cfg(feature = "http-retry")]
+    async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        let batch = Arc::new(batch);
+
+        retry_with_backoff(
+            Tokio,
+            policy,
+            classify_http_export_error,
+            "HttpTracesClient.Export",
+            || async {
+                let batch_clone = Arc::clone(&batch);
+
+                // Get client
+                let client = self
+                    .client
+                    .lock()
+                    .map_err(|e| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: format!("Mutex lock failed: {e}"),
+                    })?
+                    .as_ref()
+                    .ok_or_else(|| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: "Exporter already shutdown".to_string(),
+                    })?
+                    .clone();
+
+                // Build request body
+                let (body, content_type, content_encoding) = self
+                    .build_trace_export_body((*batch_clone).clone())
+                    .map_err(|e| HttpExportError {
+                        status_code: 400,
+                        retry_after: None,
+                        message: format!("Failed to build request body: {e}"),
+                    })?;
+
+                // Build HTTP request
+                let mut request_builder = http::Request::builder()
+                    .method(Method::POST)
+                    .uri(&self.collector_endpoint)
+                    .header(CONTENT_TYPE, content_type);
+
+                if let Some(encoding) = content_encoding {
+                    request_builder = request_builder.header("Content-Encoding", encoding);
+                }
+
+                let mut request =
+                    request_builder
+                        .body(body.into())
+                        .map_err(|e| HttpExportError {
+                            status_code: 400,
+                            retry_after: None,
+                            message: format!("Failed to build HTTP request: {e}"),
+                        })?;
+
+                for (k, v) in &self.headers {
+                    request.headers_mut().insert(k.clone(), v.clone());
+                }
+
+                let request_uri = request.uri().to_string();
+                otel_debug!(name: "HttpTracesClient.ExportStarted");
+
+                // Send request
+                let response = client.send_bytes(request).await.map_err(|e| {
+                    HttpExportError {
+                        status_code: 0, // Network error
+                        retry_after: None,
+                        message: format!("Network error: {e:?}"),
+                    }
+                })?;
+
+                let status_code = response.status().as_u16();
+                let retry_after = response
+                    .headers()
+                    .get("retry-after")
+                    .and_then(|v| v.to_str().ok())
+                    .map(|s| s.to_string());
+
+                if !response.status().is_success() {
+                    return Err(HttpExportError {
+                        status_code,
+                        retry_after,
+                        message: format!(
+                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                            request_uri,
+                            status_code,
+                            response.body()
+                        ),
+                    });
+                }
+
+                otel_debug!(name: "HttpTracesClient.ExportSucceeded");
+                Ok(())
+            },
+        )
+        .await
+        .map_err(|e| OTelSdkError::InternalFailure(e.message))
+    }
+
+    #[cfg(not(feature = "http-retry"))]
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
         let client = match self
             .client
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index 9e21ffe6b4..e8de66d273 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -366,7 +366,7 @@ mod metric;
 #[cfg(any(feature = "http-proto", feature = "http-json", feature = "grpc-tonic"))]
 mod span;
 
-#[cfg(feature = "grpc-tonic")]
+#[cfg(any(feature = "grpc-tonic", feature = "http-retry"))]
 pub mod retry_classification;
 
 pub use crate::exporter::Compression;
diff --git a/opentelemetry-otlp/src/retry_classification.rs b/opentelemetry-otlp/src/retry_classification.rs
index 63a395bc3e..06a8d914ad 100644
--- a/opentelemetry-otlp/src/retry_classification.rs
+++ b/opentelemetry-otlp/src/retry_classification.rs
@@ -60,14 +60,14 @@ pub mod http {
     fn parse_retry_after(retry_after: &str) -> Option<Duration> {
         // Try parsing as seconds first
         if let Ok(seconds) = retry_after.trim().parse::<u64>() {
-            // Cap at 10 minutes for safety
+            // Cap at 10 minutes. TODO - what's sensible here?
             let capped_seconds = seconds.min(600);
             return Some(Duration::from_secs(capped_seconds));
         }
 
         // Try parsing as HTTP date
         if let Ok(delay_seconds) = parse_http_date_to_delay(retry_after) {
-            // Cap at 10 minutes for safety
+            // Cap at 10 minutes. TODO - what's sensible here?
             let capped_seconds = delay_seconds.min(600);
             return Some(Duration::from_secs(capped_seconds));
         }
@@ -78,7 +78,7 @@ pub mod http {
     /// Parses HTTP date format and returns delay in seconds from now.
     ///
     /// This is a simplified parser for the most common HTTP date format.
-    /// In production, you might want to use a proper HTTP date parsing library.
+    /// TODO - should we use a library here?
     fn parse_http_date_to_delay(date_str: &str) -> Result<u64, ()> {
         // For now, return error - would need proper HTTP date parsing
         // This could be implemented with chrono or similar
@@ -88,6 +88,7 @@ pub mod http {
 }
 
 /// gRPC-specific error classification with RetryInfo support.
+#[cfg(feature = "grpc-tonic")]
 pub mod grpc {
     use super::*;
 
@@ -122,7 +123,7 @@ pub mod grpc {
             tonic::Code::ResourceExhausted => {
                 if let Some(seconds) = retry_info_seconds {
                     // Server signals recovery is possible - use throttled retry
-                    let capped_seconds = seconds.min(600); // Cap at 10 minutes for safety
+                    let capped_seconds = seconds.min(600); // Cap at 10 minutes. TODO - what's sensible here?
                     return RetryErrorType::Throttled(std::time::Duration::from_secs(
                         capped_seconds,
                     ));

From 2371f39ffd273ba9693f5a6a8c6608dd95f9dd60 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 13:30:03 +0200
Subject: [PATCH 15/27] Implement retry for http metrics and logs

---
 opentelemetry-otlp/src/exporter/http/logs.rs  | 115 ++++++++++++++++++
 .../src/exporter/http/metrics.rs              | 114 +++++++++++++++++
 opentelemetry-otlp/src/exporter/http/mod.rs   |  31 +++++
 opentelemetry-otlp/src/exporter/http/trace.rs |  61 ++++------
 4 files changed, 282 insertions(+), 39 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/logs.rs b/opentelemetry-otlp/src/exporter/http/logs.rs
index 3a66266652..cc61a10474 100644
--- a/opentelemetry-otlp/src/exporter/http/logs.rs
+++ b/opentelemetry-otlp/src/exporter/http/logs.rs
@@ -4,9 +4,124 @@ use http::{header::CONTENT_TYPE, Method};
 use opentelemetry::otel_debug;
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::logs::{LogBatch, LogExporter};
+#[cfg(feature = "http-retry")]
+use std::sync::Arc;
 use std::time;
 
+#[cfg(feature = "http-retry")]
+use super::{classify_http_export_error, HttpExportError, HttpRetryData};
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::runtime::Tokio;
+
 impl LogExporter for OtlpHttpClient {
+    #[cfg(feature = "http-retry")]
+    async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        // Build request body once before retry loop since LogBatch contains borrowed data
+        let (body, content_type, content_encoding) = self
+            .build_logs_export_body(batch)
+            .map_err(OTelSdkError::InternalFailure)?;
+
+        let retry_data = Arc::new(HttpRetryData {
+            body,
+            headers: self.headers.clone(),
+            endpoint: self.collector_endpoint.to_string(),
+        });
+
+        retry_with_backoff(
+            Tokio,
+            policy,
+            classify_http_export_error,
+            "HttpLogsClient.Export",
+            || async {
+                // Get client
+                let client = self
+                    .client
+                    .lock()
+                    .map_err(|e| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: format!("Mutex lock failed: {e}"),
+                    })?
+                    .as_ref()
+                    .ok_or_else(|| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: "Exporter already shutdown".to_string(),
+                    })?
+                    .clone();
+
+                // Build HTTP request
+                let mut request_builder = http::Request::builder()
+                    .method(Method::POST)
+                    .uri(&retry_data.endpoint)
+                    .header(CONTENT_TYPE, content_type);
+
+                if let Some(encoding) = content_encoding {
+                    request_builder = request_builder.header(CONTENT_ENCODING, encoding);
+                }
+
+                let mut request = request_builder
+                    .body(retry_data.body.clone().into())
+                    .map_err(|e| HttpExportError {
+                        status_code: 400,
+                        retry_after: None,
+                        message: format!("Failed to build HTTP request: {e}"),
+                    })?;
+
+                for (k, v) in &retry_data.headers {
+                    request.headers_mut().insert(k.clone(), v.clone());
+                }
+
+                let request_uri = request.uri().to_string();
+                otel_debug!(name: "HttpLogsClient.ExportStarted");
+
+                // Send request
+                let response = client.send_bytes(request).await.map_err(|e| {
+                    HttpExportError {
+                        status_code: 0, // Network error
+                        retry_after: None,
+                        message: format!("Network error: {e:?}"),
+                    }
+                })?;
+
+                let status_code = response.status().as_u16();
+                let retry_after = response
+                    .headers()
+                    .get("retry-after")
+                    .and_then(|v| v.to_str().ok())
+                    .map(|s| s.to_string());
+
+                if !response.status().is_success() {
+                    return Err(HttpExportError {
+                        status_code,
+                        retry_after,
+                        message: format!(
+                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                            request_uri,
+                            status_code,
+                            response.body()
+                        ),
+                    });
+                }
+
+                otel_debug!(name: "HttpLogsClient.ExportSucceeded");
+                Ok(())
+            },
+        )
+        .await
+        .map_err(|e| OTelSdkError::InternalFailure(e.message))
+    }
+
+    #[cfg(not(feature = "http-retry"))]
     async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
         let client = self
             .client
diff --git a/opentelemetry-otlp/src/exporter/http/metrics.rs b/opentelemetry-otlp/src/exporter/http/metrics.rs
index 11a3041585..b022754486 100644
--- a/opentelemetry-otlp/src/exporter/http/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/http/metrics.rs
@@ -8,7 +8,121 @@ use opentelemetry_sdk::metrics::data::ResourceMetrics;
 
 use super::OtlpHttpClient;
 
+#[cfg(feature = "http-retry")]
+use super::{classify_http_export_error, HttpExportError, HttpRetryData};
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::runtime::Tokio;
+
 impl MetricsClient for OtlpHttpClient {
+    #[cfg(feature = "http-retry")]
+    async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_delay_ms: 100,
+            max_delay_ms: 1600,
+            jitter_ms: 100,
+        };
+
+        // Build request body once before retry loop
+        let (body, content_type, content_encoding) =
+            self.build_metrics_export_body(metrics).ok_or_else(|| {
+                OTelSdkError::InternalFailure("Failed to serialize metrics".to_string())
+            })?;
+
+        let retry_data = Arc::new(HttpRetryData {
+            body,
+            headers: self.headers.clone(),
+            endpoint: self.collector_endpoint.to_string(),
+        });
+
+        retry_with_backoff(
+            Tokio,
+            policy,
+            classify_http_export_error,
+            "HttpMetricsClient.Export",
+            || async {
+                // Get client
+                let client = self
+                    .client
+                    .lock()
+                    .map_err(|e| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: format!("Mutex lock failed: {e}"),
+                    })?
+                    .as_ref()
+                    .ok_or_else(|| HttpExportError {
+                        status_code: 500,
+                        retry_after: None,
+                        message: "Exporter already shutdown".to_string(),
+                    })?
+                    .clone();
+
+                // Build HTTP request
+                let mut request_builder = http::Request::builder()
+                    .method(Method::POST)
+                    .uri(&retry_data.endpoint)
+                    .header(CONTENT_TYPE, content_type);
+
+                if let Some(encoding) = content_encoding {
+                    request_builder = request_builder.header("Content-Encoding", encoding);
+                }
+
+                let mut request = request_builder
+                    .body(retry_data.body.clone().into())
+                    .map_err(|e| HttpExportError {
+                        status_code: 400,
+                        retry_after: None,
+                        message: format!("Failed to build HTTP request: {e}"),
+                    })?;
+
+                for (k, v) in &retry_data.headers {
+                    request.headers_mut().insert(k.clone(), v.clone());
+                }
+
+                let request_uri = request.uri().to_string();
+                otel_debug!(name: "HttpMetricsClient.ExportStarted");
+
+                // Send request
+                let response = client.send_bytes(request).await.map_err(|e| {
+                    HttpExportError {
+                        status_code: 0, // Network error
+                        retry_after: None,
+                        message: format!("Network error: {e:?}"),
+                    }
+                })?;
+
+                let status_code = response.status().as_u16();
+                let retry_after = response
+                    .headers()
+                    .get("retry-after")
+                    .and_then(|v| v.to_str().ok())
+                    .map(|s| s.to_string());
+
+                if !response.status().is_success() {
+                    return Err(HttpExportError {
+                        status_code,
+                        retry_after,
+                        message: format!(
+                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                            request_uri,
+                            status_code,
+                            response.body()
+                        ),
+                    });
+                }
+
+                otel_debug!(name: "HttpMetricsClient.ExportSucceeded");
+                Ok(())
+            },
+        )
+        .await
+        .map_err(|e| OTelSdkError::InternalFailure(e.message))
+    }
+
+    #[cfg(not(feature = "http-retry"))]
     async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
         let client = self
             .client
diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 5cb25ea37e..e51c950432 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -22,6 +22,37 @@ use std::str::FromStr;
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
+#[cfg(feature = "http-retry")]
+use crate::retry_classification::http::classify_http_error;
+#[cfg(feature = "http-retry")]
+use opentelemetry_sdk::retry::RetryErrorType;
+
+// Shared HTTP retry functionality
+#[cfg(feature = "http-retry")]
+/// HTTP-specific error wrapper for retry classification
+#[derive(Debug)]
+pub(crate) struct HttpExportError {
+    pub status_code: u16,
+    pub retry_after: Option<String>,
+    pub message: String,
+}
+
+#[cfg(feature = "http-retry")]
+/// Classify HTTP export errors for retry decisions
+pub(crate) fn classify_http_export_error(error: &HttpExportError) -> RetryErrorType {
+    classify_http_error(error.status_code, error.retry_after.as_deref())
+}
+
+#[cfg(feature = "http-retry")]
+/// Shared HTTP request data for retry attempts - optimizes Arc usage by bundling all data
+/// we need to pass into the retry handler
+#[derive(Debug)]
+pub(crate) struct HttpRetryData {
+    pub body: Vec<u8>,
+    pub headers: HashMap<HeaderName, HeaderValue>,
+    pub endpoint: String,
+}
+
 #[cfg(feature = "metrics")]
 mod metrics;
 
diff --git a/opentelemetry-otlp/src/exporter/http/trace.rs b/opentelemetry-otlp/src/exporter/http/trace.rs
index b94d74235d..c55d4f4609 100644
--- a/opentelemetry-otlp/src/exporter/http/trace.rs
+++ b/opentelemetry-otlp/src/exporter/http/trace.rs
@@ -9,27 +9,12 @@ use opentelemetry_sdk::{
 };
 
 #[cfg(feature = "http-retry")]
-use crate::retry_classification::http::classify_http_error;
+use super::{classify_http_export_error, HttpExportError, HttpRetryData};
 #[cfg(feature = "http-retry")]
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryErrorType, RetryPolicy};
+use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
 #[cfg(feature = "http-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
-#[cfg(feature = "http-retry")]
-/// HTTP-specific error wrapper for retry classification
-#[derive(Debug)]
-struct HttpExportError {
-    status_code: u16,
-    retry_after: Option<String>,
-    message: String,
-}
-
-#[cfg(feature = "http-retry")]
-/// Classify HTTP export errors for retry decisions
-fn classify_http_export_error(error: &HttpExportError) -> RetryErrorType {
-    classify_http_error(error.status_code, error.retry_after.as_deref())
-}
-
 impl SpanExporter for OtlpHttpClient {
     #[cfg(feature = "http-retry")]
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
@@ -40,7 +25,17 @@ impl SpanExporter for OtlpHttpClient {
             jitter_ms: 100,
         };
 
-        let batch = Arc::new(batch);
+        // Build request body once before retry loop
+        let (body, content_type, content_encoding) =
+            self.build_trace_export_body(batch).map_err(|e| {
+                OTelSdkError::InternalFailure(format!("Failed to build request body: {e}"))
+            })?;
+
+        let retry_data = Arc::new(HttpRetryData {
+            body,
+            headers: self.headers.clone(),
+            endpoint: self.collector_endpoint.to_string(),
+        });
 
         retry_with_backoff(
             Tokio,
@@ -48,8 +43,6 @@ impl SpanExporter for OtlpHttpClient {
             classify_http_export_error,
             "HttpTracesClient.Export",
             || async {
-                let batch_clone = Arc::clone(&batch);
-
                 // Get client
                 let client = self
                     .client
@@ -67,35 +60,25 @@ impl SpanExporter for OtlpHttpClient {
                     })?
                     .clone();
 
-                // Build request body
-                let (body, content_type, content_encoding) = self
-                    .build_trace_export_body((*batch_clone).clone())
-                    .map_err(|e| HttpExportError {
-                        status_code: 400,
-                        retry_after: None,
-                        message: format!("Failed to build request body: {e}"),
-                    })?;
-
                 // Build HTTP request
                 let mut request_builder = http::Request::builder()
                     .method(Method::POST)
-                    .uri(&self.collector_endpoint)
+                    .uri(&retry_data.endpoint)
                     .header(CONTENT_TYPE, content_type);
 
                 if let Some(encoding) = content_encoding {
                     request_builder = request_builder.header("Content-Encoding", encoding);
                 }
 
-                let mut request =
-                    request_builder
-                        .body(body.into())
-                        .map_err(|e| HttpExportError {
-                            status_code: 400,
-                            retry_after: None,
-                            message: format!("Failed to build HTTP request: {e}"),
-                        })?;
+                let mut request = request_builder
+                    .body(retry_data.body.clone().into())
+                    .map_err(|e| HttpExportError {
+                        status_code: 400,
+                        retry_after: None,
+                        message: format!("Failed to build HTTP request: {e}"),
+                    })?;
 
-                for (k, v) in &self.headers {
+                for (k, v) in &retry_data.headers {
                     request.headers_mut().insert(k.clone(), v.clone());
                 }
 

From c4cc2138ff5d221a66224ff8d24f2886c4b8cfd6 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 12 Aug 2025 13:39:07 +0200
Subject: [PATCH 16/27] changelog

---
 opentelemetry-otlp/CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/opentelemetry-otlp/CHANGELOG.md b/opentelemetry-otlp/CHANGELOG.md
index 45a2196ffc..1e5fb0ec75 100644
--- a/opentelemetry-otlp/CHANGELOG.md
+++ b/opentelemetry-otlp/CHANGELOG.md
@@ -8,6 +8,7 @@ Released 2025-Sep-25
 
 - Update `opentelemetry-proto` and `opentelemetry-http` dependency version to 0.31.0
 - Add HTTP compression support with `gzip-http` and `zstd-http` feature flags
+- Add retry with exponential backoff and throttling support for HTTP and gRPC exporters
 
 ## 0.30.0
 

From 1b68212cb30293a5334fe61c57e866808c6b97ea Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 2 Sep 2025 10:59:45 +0200
Subject: [PATCH 17/27] chore: wrap headers in arc to save cloning per batch

---
 opentelemetry-otlp/src/exporter/http/logs.rs    | 4 ++--
 opentelemetry-otlp/src/exporter/http/metrics.rs | 4 ++--
 opentelemetry-otlp/src/exporter/http/mod.rs     | 6 +++---
 opentelemetry-otlp/src/exporter/http/trace.rs   | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/logs.rs b/opentelemetry-otlp/src/exporter/http/logs.rs
index cc61a10474..c5c63299c5 100644
--- a/opentelemetry-otlp/src/exporter/http/logs.rs
+++ b/opentelemetry-otlp/src/exporter/http/logs.rs
@@ -77,7 +77,7 @@ impl LogExporter for OtlpHttpClient {
                         message: format!("Failed to build HTTP request: {e}"),
                     })?;
 
-                for (k, v) in &retry_data.headers {
+                for (k, v) in retry_data.headers.iter() {
                     request.headers_mut().insert(k.clone(), v.clone());
                 }
 
@@ -147,7 +147,7 @@ impl LogExporter for OtlpHttpClient {
             .body(body.into())
             .map_err(|e| OTelSdkError::InternalFailure(e.to_string()))?;
 
-        for (k, v) in &self.headers {
+        for (k, v) in self.headers.iter() {
             request.headers_mut().insert(k.clone(), v.clone());
         }
 
diff --git a/opentelemetry-otlp/src/exporter/http/metrics.rs b/opentelemetry-otlp/src/exporter/http/metrics.rs
index b022754486..f3598f4f52 100644
--- a/opentelemetry-otlp/src/exporter/http/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/http/metrics.rs
@@ -78,7 +78,7 @@ impl MetricsClient for OtlpHttpClient {
                         message: format!("Failed to build HTTP request: {e}"),
                     })?;
 
-                for (k, v) in &retry_data.headers {
+                for (k, v) in retry_data.headers.iter() {
                     request.headers_mut().insert(k.clone(), v.clone());
                 }
 
@@ -151,7 +151,7 @@ impl MetricsClient for OtlpHttpClient {
             .body(body.into())
             .map_err(|e| OTelSdkError::InternalFailure(format!("{e:?}")))?;
 
-        for (k, v) in &self.headers {
+        for (k, v) in self.headers.iter() {
             request.headers_mut().insert(k.clone(), v.clone());
         }
 
diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index e51c950432..a21a04570b 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -49,7 +49,7 @@ pub(crate) fn classify_http_export_error(error: &HttpExportError) -> RetryErrorT
 #[derive(Debug)]
 pub(crate) struct HttpRetryData {
     pub body: Vec<u8>,
-    pub headers: HashMap<HeaderName, HeaderValue>,
+    pub headers: Arc<HashMap<HeaderName, HeaderValue>>,
     pub endpoint: String,
 }
 
@@ -329,7 +329,7 @@ impl HttpExporterBuilder {
 pub(crate) struct OtlpHttpClient {
     client: Mutex<Option<Arc<dyn HttpClient>>>,
     collector_endpoint: Uri,
-    headers: HashMap<HeaderName, HeaderValue>,
+    headers: Arc<HashMap<HeaderName, HeaderValue>>,
     protocol: Protocol,
     _timeout: Duration,
     compression: Option<crate::Compression>,
@@ -383,7 +383,7 @@ impl OtlpHttpClient {
         OtlpHttpClient {
             client: Mutex::new(Some(client)),
             collector_endpoint,
-            headers,
+            headers: Arc::new(headers),
             protocol,
             _timeout: timeout,
             compression,
diff --git a/opentelemetry-otlp/src/exporter/http/trace.rs b/opentelemetry-otlp/src/exporter/http/trace.rs
index c55d4f4609..feaada955b 100644
--- a/opentelemetry-otlp/src/exporter/http/trace.rs
+++ b/opentelemetry-otlp/src/exporter/http/trace.rs
@@ -78,7 +78,7 @@ impl SpanExporter for OtlpHttpClient {
                         message: format!("Failed to build HTTP request: {e}"),
                     })?;
 
-                for (k, v) in &retry_data.headers {
+                for (k, v) in retry_data.headers.iter() {
                     request.headers_mut().insert(k.clone(), v.clone());
                 }
 
@@ -155,7 +155,7 @@ impl SpanExporter for OtlpHttpClient {
             Err(e) => return Err(OTelSdkError::InternalFailure(e.to_string())),
         };
 
-        for (k, v) in &self.headers {
+        for (k, v) in self.headers.iter() {
             request.headers_mut().insert(k.clone(), v.clone());
         }
 

From 2d97decf1c9b1e07eadce7a825362ba204356ebf Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 2 Sep 2025 12:09:27 +0200
Subject: [PATCH 18/27] chore: simplify HttpExportError creation

---
 opentelemetry-otlp/src/exporter/http/logs.rs  | 42 +++++++------------
 .../src/exporter/http/metrics.rs              | 42 +++++++------------
 opentelemetry-otlp/src/exporter/http/mod.rs   | 21 ++++++++++
 opentelemetry-otlp/src/exporter/http/trace.rs | 42 +++++++------------
 4 files changed, 69 insertions(+), 78 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/logs.rs b/opentelemetry-otlp/src/exporter/http/logs.rs
index c5c63299c5..a1e584337e 100644
--- a/opentelemetry-otlp/src/exporter/http/logs.rs
+++ b/opentelemetry-otlp/src/exporter/http/logs.rs
@@ -46,16 +46,10 @@ impl LogExporter for OtlpHttpClient {
                 let client = self
                     .client
                     .lock()
-                    .map_err(|e| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: format!("Mutex lock failed: {e}"),
-                    })?
+                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
                     .as_ref()
-                    .ok_or_else(|| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: "Exporter already shutdown".to_string(),
+                    .ok_or_else(|| {
+                        HttpExportError::new(500, "Exporter already shutdown".to_string())
                     })?
                     .clone();
 
@@ -71,10 +65,8 @@ impl LogExporter for OtlpHttpClient {
 
                 let mut request = request_builder
                     .body(retry_data.body.clone().into())
-                    .map_err(|e| HttpExportError {
-                        status_code: 400,
-                        retry_after: None,
-                        message: format!("Failed to build HTTP request: {e}"),
+                    .map_err(|e| {
+                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
                     })?;
 
                 for (k, v) in retry_data.headers.iter() {
@@ -86,11 +78,7 @@ impl LogExporter for OtlpHttpClient {
 
                 // Send request
                 let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError {
-                        status_code: 0, // Network error
-                        retry_after: None,
-                        message: format!("Network error: {e:?}"),
-                    }
+                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
                 })?;
 
                 let status_code = response.status().as_u16();
@@ -101,15 +89,17 @@ impl LogExporter for OtlpHttpClient {
                     .map(|s| s.to_string());
 
                 if !response.status().is_success() {
-                    return Err(HttpExportError {
+                    let message = format!(
+                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                        request_uri,
                         status_code,
-                        retry_after,
-                        message: format!(
-                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                            request_uri,
-                            status_code,
-                            response.body()
-                        ),
+                        response.body()
+                    );
+                    return Err(match retry_after {
+                        Some(retry_after) => {
+                            HttpExportError::with_retry_after(status_code, retry_after, message)
+                        }
+                        None => HttpExportError::new(status_code, message),
                     });
                 }
 
diff --git a/opentelemetry-otlp/src/exporter/http/metrics.rs b/opentelemetry-otlp/src/exporter/http/metrics.rs
index f3598f4f52..0f517b3839 100644
--- a/opentelemetry-otlp/src/exporter/http/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/http/metrics.rs
@@ -47,16 +47,10 @@ impl MetricsClient for OtlpHttpClient {
                 let client = self
                     .client
                     .lock()
-                    .map_err(|e| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: format!("Mutex lock failed: {e}"),
-                    })?
+                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
                     .as_ref()
-                    .ok_or_else(|| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: "Exporter already shutdown".to_string(),
+                    .ok_or_else(|| {
+                        HttpExportError::new(500, "Exporter already shutdown".to_string())
                     })?
                     .clone();
 
@@ -72,10 +66,8 @@ impl MetricsClient for OtlpHttpClient {
 
                 let mut request = request_builder
                     .body(retry_data.body.clone().into())
-                    .map_err(|e| HttpExportError {
-                        status_code: 400,
-                        retry_after: None,
-                        message: format!("Failed to build HTTP request: {e}"),
+                    .map_err(|e| {
+                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
                     })?;
 
                 for (k, v) in retry_data.headers.iter() {
@@ -87,11 +79,7 @@ impl MetricsClient for OtlpHttpClient {
 
                 // Send request
                 let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError {
-                        status_code: 0, // Network error
-                        retry_after: None,
-                        message: format!("Network error: {e:?}"),
-                    }
+                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
                 })?;
 
                 let status_code = response.status().as_u16();
@@ -102,15 +90,17 @@ impl MetricsClient for OtlpHttpClient {
                     .map(|s| s.to_string());
 
                 if !response.status().is_success() {
-                    return Err(HttpExportError {
+                    let message = format!(
+                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                        request_uri,
                         status_code,
-                        retry_after,
-                        message: format!(
-                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                            request_uri,
-                            status_code,
-                            response.body()
-                        ),
+                        response.body()
+                    );
+                    return Err(match retry_after {
+                        Some(retry_after) => {
+                            HttpExportError::with_retry_after(status_code, retry_after, message)
+                        }
+                        None => HttpExportError::new(status_code, message),
                     });
                 }
 
diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index a21a04570b..1f7f3641cf 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -37,6 +37,27 @@ pub(crate) struct HttpExportError {
     pub message: String,
 }
 
+#[cfg(feature = "http-retry")]
+impl HttpExportError {
+    /// Create a new HttpExportError without retry-after header
+    pub(crate) fn new(status_code: u16, message: String) -> Self {
+        Self {
+            status_code,
+            retry_after: None,
+            message,
+        }
+    }
+
+    /// Create a new HttpExportError with retry-after header
+    pub(crate) fn with_retry_after(status_code: u16, retry_after: String, message: String) -> Self {
+        Self {
+            status_code,
+            retry_after: Some(retry_after),
+            message,
+        }
+    }
+}
+
 #[cfg(feature = "http-retry")]
 /// Classify HTTP export errors for retry decisions
 pub(crate) fn classify_http_export_error(error: &HttpExportError) -> RetryErrorType {
diff --git a/opentelemetry-otlp/src/exporter/http/trace.rs b/opentelemetry-otlp/src/exporter/http/trace.rs
index feaada955b..073434f68a 100644
--- a/opentelemetry-otlp/src/exporter/http/trace.rs
+++ b/opentelemetry-otlp/src/exporter/http/trace.rs
@@ -47,16 +47,10 @@ impl SpanExporter for OtlpHttpClient {
                 let client = self
                     .client
                     .lock()
-                    .map_err(|e| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: format!("Mutex lock failed: {e}"),
-                    })?
+                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
                     .as_ref()
-                    .ok_or_else(|| HttpExportError {
-                        status_code: 500,
-                        retry_after: None,
-                        message: "Exporter already shutdown".to_string(),
+                    .ok_or_else(|| {
+                        HttpExportError::new(500, "Exporter already shutdown".to_string())
                     })?
                     .clone();
 
@@ -72,10 +66,8 @@ impl SpanExporter for OtlpHttpClient {
 
                 let mut request = request_builder
                     .body(retry_data.body.clone().into())
-                    .map_err(|e| HttpExportError {
-                        status_code: 400,
-                        retry_after: None,
-                        message: format!("Failed to build HTTP request: {e}"),
+                    .map_err(|e| {
+                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
                     })?;
 
                 for (k, v) in retry_data.headers.iter() {
@@ -87,11 +79,7 @@ impl SpanExporter for OtlpHttpClient {
 
                 // Send request
                 let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError {
-                        status_code: 0, // Network error
-                        retry_after: None,
-                        message: format!("Network error: {e:?}"),
-                    }
+                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
                 })?;
 
                 let status_code = response.status().as_u16();
@@ -102,15 +90,17 @@ impl SpanExporter for OtlpHttpClient {
                     .map(|s| s.to_string());
 
                 if !response.status().is_success() {
-                    return Err(HttpExportError {
+                    let message = format!(
+                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                        request_uri,
                         status_code,
-                        retry_after,
-                        message: format!(
-                            "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                            request_uri,
-                            status_code,
-                            response.body()
-                        ),
+                        response.body()
+                    );
+                    return Err(match retry_after {
+                        Some(retry_after) => {
+                            HttpExportError::with_retry_after(status_code, retry_after, message)
+                        }
+                        None => HttpExportError::new(status_code, message),
                     });
                 }
 

From 579efce19de71fe45c59009155f85bbc503a79e1 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Tue, 2 Sep 2025 12:40:46 +0200
Subject: [PATCH 19/27] chore: factor retry logic out

---
 opentelemetry-otlp/src/exporter/http/logs.rs  | 156 +---------------
 .../src/exporter/http/metrics.rs              | 167 +-----------------
 opentelemetry-otlp/src/exporter/http/mod.rs   | 160 ++++++++++++++++-
 opentelemetry-otlp/src/exporter/http/trace.rs | 163 +----------------
 4 files changed, 164 insertions(+), 482 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/logs.rs b/opentelemetry-otlp/src/exporter/http/logs.rs
index a1e584337e..8690250f81 100644
--- a/opentelemetry-otlp/src/exporter/http/logs.rs
+++ b/opentelemetry-otlp/src/exporter/http/logs.rs
@@ -1,166 +1,16 @@
 use super::OtlpHttpClient;
-use http::header::CONTENT_ENCODING;
-use http::{header::CONTENT_TYPE, Method};
-use opentelemetry::otel_debug;
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::logs::{LogBatch, LogExporter};
-#[cfg(feature = "http-retry")]
-use std::sync::Arc;
 use std::time;
 
-#[cfg(feature = "http-retry")]
-use super::{classify_http_export_error, HttpExportError, HttpRetryData};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::runtime::Tokio;
-
 impl LogExporter for OtlpHttpClient {
-    #[cfg(feature = "http-retry")]
     async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        // Build request body once before retry loop since LogBatch contains borrowed data
-        let (body, content_type, content_encoding) = self
-            .build_logs_export_body(batch)
-            .map_err(OTelSdkError::InternalFailure)?;
-
-        let retry_data = Arc::new(HttpRetryData {
-            body,
-            headers: self.headers.clone(),
-            endpoint: self.collector_endpoint.to_string(),
-        });
-
-        retry_with_backoff(
-            Tokio,
-            policy,
-            classify_http_export_error,
+        self.export_http_with_retry(
+            batch,
+            OtlpHttpClient::build_logs_export_body,
             "HttpLogsClient.Export",
-            || async {
-                // Get client
-                let client = self
-                    .client
-                    .lock()
-                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
-                    .as_ref()
-                    .ok_or_else(|| {
-                        HttpExportError::new(500, "Exporter already shutdown".to_string())
-                    })?
-                    .clone();
-
-                // Build HTTP request
-                let mut request_builder = http::Request::builder()
-                    .method(Method::POST)
-                    .uri(&retry_data.endpoint)
-                    .header(CONTENT_TYPE, content_type);
-
-                if let Some(encoding) = content_encoding {
-                    request_builder = request_builder.header(CONTENT_ENCODING, encoding);
-                }
-
-                let mut request = request_builder
-                    .body(retry_data.body.clone().into())
-                    .map_err(|e| {
-                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
-                    })?;
-
-                for (k, v) in retry_data.headers.iter() {
-                    request.headers_mut().insert(k.clone(), v.clone());
-                }
-
-                let request_uri = request.uri().to_string();
-                otel_debug!(name: "HttpLogsClient.ExportStarted");
-
-                // Send request
-                let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
-                })?;
-
-                let status_code = response.status().as_u16();
-                let retry_after = response
-                    .headers()
-                    .get("retry-after")
-                    .and_then(|v| v.to_str().ok())
-                    .map(|s| s.to_string());
-
-                if !response.status().is_success() {
-                    let message = format!(
-                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                        request_uri,
-                        status_code,
-                        response.body()
-                    );
-                    return Err(match retry_after {
-                        Some(retry_after) => {
-                            HttpExportError::with_retry_after(status_code, retry_after, message)
-                        }
-                        None => HttpExportError::new(status_code, message),
-                    });
-                }
-
-                otel_debug!(name: "HttpLogsClient.ExportSucceeded");
-                Ok(())
-            },
         )
         .await
-        .map_err(|e| OTelSdkError::InternalFailure(e.message))
-    }
-
-    #[cfg(not(feature = "http-retry"))]
-    async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
-        let client = self
-            .client
-            .lock()
-            .map_err(|e| OTelSdkError::InternalFailure(format!("Mutex lock failed: {e}")))?
-            .clone()
-            .ok_or(OTelSdkError::AlreadyShutdown)?;
-
-        let (body, content_type, content_encoding) = self
-            .build_logs_export_body(batch)
-            .map_err(OTelSdkError::InternalFailure)?;
-
-        let mut request_builder = http::Request::builder()
-            .method(Method::POST)
-            .uri(&self.collector_endpoint)
-            .header(CONTENT_TYPE, content_type);
-
-        if let Some(encoding) = content_encoding {
-            request_builder = request_builder.header(CONTENT_ENCODING, encoding);
-        }
-
-        let mut request = request_builder
-            .body(body.into())
-            .map_err(|e| OTelSdkError::InternalFailure(e.to_string()))?;
-
-        for (k, v) in self.headers.iter() {
-            request.headers_mut().insert(k.clone(), v.clone());
-        }
-
-        let request_uri = request.uri().to_string();
-        otel_debug!(name: "HttpLogsClient.ExportStarted");
-        let response = client
-            .send_bytes(request)
-            .await
-            .map_err(|e| OTelSdkError::InternalFailure(format!("{e:?}")))?;
-
-        if !response.status().is_success() {
-            let error = format!(
-                "OpenTelemetry logs export failed. Url: {}, Status Code: {}, Response: {:?}",
-                request_uri,
-                response.status().as_u16(),
-                response.body()
-            );
-            otel_debug!(name: "HttpLogsClient.ExportFailed", error = &error);
-            return Err(OTelSdkError::InternalFailure(error));
-        }
-
-        otel_debug!(name: "HttpLogsClient.ExportSucceeded");
-        Ok(())
     }
 
     fn shutdown_with_timeout(&self, _timeout: time::Duration) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/exporter/http/metrics.rs b/opentelemetry-otlp/src/exporter/http/metrics.rs
index 0f517b3839..6acee3821b 100644
--- a/opentelemetry-otlp/src/exporter/http/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/http/metrics.rs
@@ -1,174 +1,19 @@
-use std::sync::Arc;
-
 use crate::metric::MetricsClient;
-use http::{header::CONTENT_TYPE, Method};
-use opentelemetry::otel_debug;
 use opentelemetry_sdk::error::{OTelSdkError, OTelSdkResult};
 use opentelemetry_sdk::metrics::data::ResourceMetrics;
 
 use super::OtlpHttpClient;
 
-#[cfg(feature = "http-retry")]
-use super::{classify_http_export_error, HttpExportError, HttpRetryData};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::runtime::Tokio;
-
 impl MetricsClient for OtlpHttpClient {
-    #[cfg(feature = "http-retry")]
     async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
+        let build_body_wrapper = |client: &OtlpHttpClient, metrics: &ResourceMetrics| {
+            client
+                .build_metrics_export_body(metrics)
+                .ok_or_else(|| "Failed to serialize metrics".to_string())
         };
 
-        // Build request body once before retry loop
-        let (body, content_type, content_encoding) =
-            self.build_metrics_export_body(metrics).ok_or_else(|| {
-                OTelSdkError::InternalFailure("Failed to serialize metrics".to_string())
-            })?;
-
-        let retry_data = Arc::new(HttpRetryData {
-            body,
-            headers: self.headers.clone(),
-            endpoint: self.collector_endpoint.to_string(),
-        });
-
-        retry_with_backoff(
-            Tokio,
-            policy,
-            classify_http_export_error,
-            "HttpMetricsClient.Export",
-            || async {
-                // Get client
-                let client = self
-                    .client
-                    .lock()
-                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
-                    .as_ref()
-                    .ok_or_else(|| {
-                        HttpExportError::new(500, "Exporter already shutdown".to_string())
-                    })?
-                    .clone();
-
-                // Build HTTP request
-                let mut request_builder = http::Request::builder()
-                    .method(Method::POST)
-                    .uri(&retry_data.endpoint)
-                    .header(CONTENT_TYPE, content_type);
-
-                if let Some(encoding) = content_encoding {
-                    request_builder = request_builder.header("Content-Encoding", encoding);
-                }
-
-                let mut request = request_builder
-                    .body(retry_data.body.clone().into())
-                    .map_err(|e| {
-                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
-                    })?;
-
-                for (k, v) in retry_data.headers.iter() {
-                    request.headers_mut().insert(k.clone(), v.clone());
-                }
-
-                let request_uri = request.uri().to_string();
-                otel_debug!(name: "HttpMetricsClient.ExportStarted");
-
-                // Send request
-                let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
-                })?;
-
-                let status_code = response.status().as_u16();
-                let retry_after = response
-                    .headers()
-                    .get("retry-after")
-                    .and_then(|v| v.to_str().ok())
-                    .map(|s| s.to_string());
-
-                if !response.status().is_success() {
-                    let message = format!(
-                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                        request_uri,
-                        status_code,
-                        response.body()
-                    );
-                    return Err(match retry_after {
-                        Some(retry_after) => {
-                            HttpExportError::with_retry_after(status_code, retry_after, message)
-                        }
-                        None => HttpExportError::new(status_code, message),
-                    });
-                }
-
-                otel_debug!(name: "HttpMetricsClient.ExportSucceeded");
-                Ok(())
-            },
-        )
-        .await
-        .map_err(|e| OTelSdkError::InternalFailure(e.message))
-    }
-
-    #[cfg(not(feature = "http-retry"))]
-    async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
-        let client = self
-            .client
-            .lock()
-            .map_err(|e| OTelSdkError::InternalFailure(format!("Failed to acquire lock: {e:?}")))
-            .and_then(|g| match &*g {
-                Some(client) => Ok(Arc::clone(client)),
-                _ => Err(OTelSdkError::AlreadyShutdown),
-            })?;
-
-        let (body, content_type, content_encoding) =
-            self.build_metrics_export_body(metrics).ok_or_else(|| {
-                OTelSdkError::InternalFailure("Failed to serialize metrics".to_string())
-            })?;
-
-        let mut request_builder = http::Request::builder()
-            .method(Method::POST)
-            .uri(&self.collector_endpoint)
-            .header(CONTENT_TYPE, content_type);
-
-        if let Some(encoding) = content_encoding {
-            request_builder = request_builder.header("Content-Encoding", encoding);
-        }
-
-        let mut request = request_builder
-            .body(body.into())
-            .map_err(|e| OTelSdkError::InternalFailure(format!("{e:?}")))?;
-
-        for (k, v) in self.headers.iter() {
-            request.headers_mut().insert(k.clone(), v.clone());
-        }
-
-        otel_debug!(name: "HttpMetricsClient.ExportStarted");
-        let result = client.send_bytes(request).await;
-
-        match result {
-            Ok(response) => {
-                if response.status().is_success() {
-                    otel_debug!(name: "HttpMetricsClient.ExportSucceeded");
-                    Ok(())
-                } else {
-                    let error = format!(
-                        "OpenTelemetry metrics export failed. Status Code: {}, Response: {:?}",
-                        response.status().as_u16(),
-                        response.body()
-                    );
-                    otel_debug!(name: "HttpMetricsClient.ExportFailed", error = &error);
-                    Err(OTelSdkError::InternalFailure(error))
-                }
-            }
-            Err(e) => {
-                let error = format!("{e:?}");
-                otel_debug!(name: "HttpMetricsClient.ExportFailed", error = &error);
-                Err(OTelSdkError::InternalFailure(error))
-            }
-        }
+        self.export_http_with_retry(metrics, build_body_wrapper, "HttpMetricsClient.Export")
+            .await
     }
 
     fn shutdown(&self) -> OTelSdkResult {
diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 1f7f3641cf..d04c8badc7 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -28,31 +28,39 @@ use crate::retry_classification::http::classify_http_error;
 use opentelemetry_sdk::retry::RetryErrorType;
 
 // Shared HTTP retry functionality
-#[cfg(feature = "http-retry")]
 /// HTTP-specific error wrapper for retry classification
 #[derive(Debug)]
 pub(crate) struct HttpExportError {
+    #[cfg(feature = "http-retry")]
     pub status_code: u16,
+    #[cfg(feature = "http-retry")]
     pub retry_after: Option<String>,
     pub message: String,
 }
 
-#[cfg(feature = "http-retry")]
 impl HttpExportError {
     /// Create a new HttpExportError without retry-after header
-    pub(crate) fn new(status_code: u16, message: String) -> Self {
+    pub(crate) fn new(_status_code: u16, message: String) -> Self {
         Self {
-            status_code,
+            #[cfg(feature = "http-retry")]
+            status_code: _status_code,
+            #[cfg(feature = "http-retry")]
             retry_after: None,
             message,
         }
     }
 
     /// Create a new HttpExportError with retry-after header
-    pub(crate) fn with_retry_after(status_code: u16, retry_after: String, message: String) -> Self {
+    pub(crate) fn with_retry_after(
+        _status_code: u16,
+        _retry_after: String,
+        message: String,
+    ) -> Self {
         Self {
-            status_code,
-            retry_after: Some(retry_after),
+            #[cfg(feature = "http-retry")]
+            status_code: _status_code,
+            #[cfg(feature = "http-retry")]
+            retry_after: Some(_retry_after),
             message,
         }
     }
@@ -64,7 +72,6 @@ pub(crate) fn classify_http_export_error(error: &HttpExportError) -> RetryErrorT
     classify_http_error(error.status_code, error.retry_after.as_deref())
 }
 
-#[cfg(feature = "http-retry")]
 /// Shared HTTP request data for retry attempts - optimizes Arc usage by bundling all data
 /// we need to pass into the retry handler
 #[derive(Debug)]
@@ -360,6 +367,143 @@ pub(crate) struct OtlpHttpClient {
 }
 
 impl OtlpHttpClient {
+    /// Shared HTTP export logic used by all exporters with retry support
+    async fn export_http_with_retry<F, T>(
+        &self,
+        data: T,
+        build_body_fn: F,
+        operation_name: &'static str,
+    ) -> opentelemetry_sdk::error::OTelSdkResult
+    where
+        F: Fn(&Self, T) -> Result<(Vec<u8>, &'static str, Option<&'static str>), String>,
+    {
+        #[cfg(feature = "http-retry")]
+        {
+            use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+            use opentelemetry_sdk::runtime::Tokio;
+
+            let policy = RetryPolicy {
+                max_retries: 3,
+                initial_delay_ms: 100,
+                max_delay_ms: 1600,
+                jitter_ms: 100,
+            };
+
+            // Build request body once before retry loop
+            let (body, content_type, content_encoding) = build_body_fn(self, data)
+                .map_err(opentelemetry_sdk::error::OTelSdkError::InternalFailure)?;
+
+            let retry_data = Arc::new(HttpRetryData {
+                body,
+                headers: self.headers.clone(),
+                endpoint: self.collector_endpoint.to_string(),
+            });
+
+            retry_with_backoff(
+                Tokio,
+                policy,
+                classify_http_export_error,
+                operation_name,
+                || async {
+                    self.export_http_once(
+                        &retry_data,
+                        content_type,
+                        content_encoding,
+                        operation_name,
+                    )
+                    .await
+                },
+            )
+            .await
+            .map_err(|e| opentelemetry_sdk::error::OTelSdkError::InternalFailure(e.message))
+        }
+
+        #[cfg(not(feature = "http-retry"))]
+        {
+            let (body, content_type, content_encoding) = build_body_fn(self, data)
+                .map_err(opentelemetry_sdk::error::OTelSdkError::InternalFailure)?;
+
+            let retry_data = HttpRetryData {
+                body,
+                headers: self.headers.clone(),
+                endpoint: self.collector_endpoint.to_string(),
+            };
+
+            self.export_http_once(&retry_data, content_type, content_encoding, operation_name)
+                .await
+                .map_err(|e| opentelemetry_sdk::error::OTelSdkError::InternalFailure(e.message))
+        }
+    }
+
+    /// Single HTTP export attempt - shared between retry and no-retry paths
+    async fn export_http_once(
+        &self,
+        retry_data: &HttpRetryData,
+        content_type: &'static str,
+        content_encoding: Option<&'static str>,
+        _operation_name: &'static str,
+    ) -> Result<(), HttpExportError> {
+        // Get client
+        let client = self
+            .client
+            .lock()
+            .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
+            .as_ref()
+            .ok_or_else(|| HttpExportError::new(500, "Exporter already shutdown".to_string()))?
+            .clone();
+
+        // Build HTTP request
+        let mut request_builder = http::Request::builder()
+            .method(http::Method::POST)
+            .uri(&retry_data.endpoint)
+            .header(http::header::CONTENT_TYPE, content_type);
+
+        if let Some(encoding) = content_encoding {
+            request_builder = request_builder.header("Content-Encoding", encoding);
+        }
+
+        let mut request = request_builder
+            .body(retry_data.body.clone().into())
+            .map_err(|e| HttpExportError::new(400, format!("Failed to build HTTP request: {e}")))?;
+
+        for (k, v) in retry_data.headers.iter() {
+            request.headers_mut().insert(k.clone(), v.clone());
+        }
+
+        let request_uri = request.uri().to_string();
+        otel_debug!(name: "HttpClient.ExportStarted");
+
+        // Send request
+        let response = client.send_bytes(request).await.map_err(|e| {
+            HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
+        })?;
+
+        let status_code = response.status().as_u16();
+        let retry_after = response
+            .headers()
+            .get("retry-after")
+            .and_then(|v| v.to_str().ok())
+            .map(|s| s.to_string());
+
+        if !response.status().is_success() {
+            let message = format!(
+                "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
+                request_uri,
+                status_code,
+                response.body()
+            );
+            return Err(match retry_after {
+                Some(retry_after) => {
+                    HttpExportError::with_retry_after(status_code, retry_after, message)
+                }
+                None => HttpExportError::new(status_code, message),
+            });
+        }
+
+        otel_debug!(name: "HttpClient.ExportSucceeded");
+        Ok(())
+    }
+
     /// Compress data using gzip or zstd if the user has requested it and the relevant feature
     /// has been enabled. If the user has requested it but the feature has not been enabled,
     /// we should catch this at exporter build time and never get here.
diff --git a/opentelemetry-otlp/src/exporter/http/trace.rs b/opentelemetry-otlp/src/exporter/http/trace.rs
index 073434f68a..c77e59efde 100644
--- a/opentelemetry-otlp/src/exporter/http/trace.rs
+++ b/opentelemetry-otlp/src/exporter/http/trace.rs
@@ -1,174 +1,17 @@
-use std::sync::Arc;
-
 use super::OtlpHttpClient;
-use http::{header::CONTENT_TYPE, Method};
-use opentelemetry::otel_debug;
 use opentelemetry_sdk::{
     error::{OTelSdkError, OTelSdkResult},
     trace::{SpanData, SpanExporter},
 };
 
-#[cfg(feature = "http-retry")]
-use super::{classify_http_export_error, HttpExportError, HttpRetryData};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
-#[cfg(feature = "http-retry")]
-use opentelemetry_sdk::runtime::Tokio;
-
 impl SpanExporter for OtlpHttpClient {
-    #[cfg(feature = "http-retry")]
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
-        // Build request body once before retry loop
-        let (body, content_type, content_encoding) =
-            self.build_trace_export_body(batch).map_err(|e| {
-                OTelSdkError::InternalFailure(format!("Failed to build request body: {e}"))
-            })?;
-
-        let retry_data = Arc::new(HttpRetryData {
-            body,
-            headers: self.headers.clone(),
-            endpoint: self.collector_endpoint.to_string(),
-        });
-
-        retry_with_backoff(
-            Tokio,
-            policy,
-            classify_http_export_error,
+        self.export_http_with_retry(
+            batch,
+            OtlpHttpClient::build_trace_export_body,
             "HttpTracesClient.Export",
-            || async {
-                // Get client
-                let client = self
-                    .client
-                    .lock()
-                    .map_err(|e| HttpExportError::new(500, format!("Mutex lock failed: {e}")))?
-                    .as_ref()
-                    .ok_or_else(|| {
-                        HttpExportError::new(500, "Exporter already shutdown".to_string())
-                    })?
-                    .clone();
-
-                // Build HTTP request
-                let mut request_builder = http::Request::builder()
-                    .method(Method::POST)
-                    .uri(&retry_data.endpoint)
-                    .header(CONTENT_TYPE, content_type);
-
-                if let Some(encoding) = content_encoding {
-                    request_builder = request_builder.header("Content-Encoding", encoding);
-                }
-
-                let mut request = request_builder
-                    .body(retry_data.body.clone().into())
-                    .map_err(|e| {
-                        HttpExportError::new(400, format!("Failed to build HTTP request: {e}"))
-                    })?;
-
-                for (k, v) in retry_data.headers.iter() {
-                    request.headers_mut().insert(k.clone(), v.clone());
-                }
-
-                let request_uri = request.uri().to_string();
-                otel_debug!(name: "HttpTracesClient.ExportStarted");
-
-                // Send request
-                let response = client.send_bytes(request).await.map_err(|e| {
-                    HttpExportError::new(0, format!("Network error: {e:?}")) // Network error
-                })?;
-
-                let status_code = response.status().as_u16();
-                let retry_after = response
-                    .headers()
-                    .get("retry-after")
-                    .and_then(|v| v.to_str().ok())
-                    .map(|s| s.to_string());
-
-                if !response.status().is_success() {
-                    let message = format!(
-                        "HTTP export failed. Url: {}, Status: {}, Response: {:?}",
-                        request_uri,
-                        status_code,
-                        response.body()
-                    );
-                    return Err(match retry_after {
-                        Some(retry_after) => {
-                            HttpExportError::with_retry_after(status_code, retry_after, message)
-                        }
-                        None => HttpExportError::new(status_code, message),
-                    });
-                }
-
-                otel_debug!(name: "HttpTracesClient.ExportSucceeded");
-                Ok(())
-            },
         )
         .await
-        .map_err(|e| OTelSdkError::InternalFailure(e.message))
-    }
-
-    #[cfg(not(feature = "http-retry"))]
-    async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
-        let client = match self
-            .client
-            .lock()
-            .map_err(|e| OTelSdkError::InternalFailure(format!("Mutex lock failed: {e}")))
-            .and_then(|g| match &*g {
-                Some(client) => Ok(Arc::clone(client)),
-                _ => Err(OTelSdkError::AlreadyShutdown),
-            }) {
-            Ok(client) => client,
-            Err(err) => return Err(err),
-        };
-
-        let (body, content_type, content_encoding) = match self.build_trace_export_body(batch) {
-            Ok(result) => result,
-            Err(e) => return Err(OTelSdkError::InternalFailure(e.to_string())),
-        };
-
-        let mut request_builder = http::Request::builder()
-            .method(Method::POST)
-            .uri(&self.collector_endpoint)
-            .header(CONTENT_TYPE, content_type);
-
-        if let Some(encoding) = content_encoding {
-            request_builder = request_builder.header("Content-Encoding", encoding);
-        }
-
-        let mut request = match request_builder.body(body.into()) {
-            Ok(req) => req,
-            Err(e) => return Err(OTelSdkError::InternalFailure(e.to_string())),
-        };
-
-        for (k, v) in self.headers.iter() {
-            request.headers_mut().insert(k.clone(), v.clone());
-        }
-
-        let request_uri = request.uri().to_string();
-        otel_debug!(name: "HttpTracesClient.ExportStarted");
-        let response = client
-            .send_bytes(request)
-            .await
-            .map_err(|e| OTelSdkError::InternalFailure(format!("{e:?}")))?;
-
-        if !response.status().is_success() {
-            let error = format!(
-                "OpenTelemetry trace export failed. Url: {}, Status Code: {}, Response: {:?}",
-                request_uri,
-                response.status().as_u16(),
-                response.body()
-            );
-            otel_debug!(name: "HttpTracesClient.ExportFailed", error = &error);
-            return Err(OTelSdkError::InternalFailure(error));
-        }
-
-        otel_debug!(name: "HttpTracesClient.ExportSucceeded");
-        Ok(())
     }
 
     fn shutdown(&mut self) -> OTelSdkResult {

From f0fe377ecce1fcf1d9d87d1a1f94287a7f69e9c8 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Fri, 5 Sep 2025 11:11:20 +0200
Subject: [PATCH 20/27] chore: configurable retry policy with default

---
 opentelemetry-otlp/src/exporter/http/mod.rs   | 118 ++++++++++++++++--
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  17 +--
 .../src/exporter/tonic/metrics.rs             |  17 +--
 opentelemetry-otlp/src/exporter/tonic/mod.rs  |  76 +++++++++--
 .../src/exporter/tonic/trace.rs               |  17 +--
 opentelemetry-otlp/src/lib.rs                 |   3 +
 opentelemetry-sdk/src/retry.rs                |   2 +-
 7 files changed, 206 insertions(+), 44 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index d04c8badc7..296ebb6f3f 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -25,7 +25,7 @@ use std::time::Duration;
 #[cfg(feature = "http-retry")]
 use crate::retry_classification::http::classify_http_error;
 #[cfg(feature = "http-retry")]
-use opentelemetry_sdk::retry::RetryErrorType;
+use opentelemetry_sdk::retry::{RetryErrorType, RetryPolicy};
 
 // Shared HTTP retry functionality
 /// HTTP-specific error wrapper for retry classification
@@ -111,6 +111,10 @@ pub struct HttpConfig {
 
     /// The compression algorithm to use when communicating with the OTLP endpoint.
     compression: Option<crate::Compression>,
+
+    /// The retry policy to use for HTTP requests.
+    #[cfg(feature = "http-retry")]
+    retry_policy: Option<RetryPolicy>,
 }
 
 /// Configuration for the OTLP HTTP exporter.
@@ -282,6 +286,8 @@ impl HttpExporterBuilder {
             self.exporter_config.protocol,
             timeout,
             compression,
+            #[cfg(feature = "http-retry")]
+            self.http_config.retry_policy.take(),
         ))
     }
 
@@ -361,6 +367,8 @@ pub(crate) struct OtlpHttpClient {
     protocol: Protocol,
     _timeout: Duration,
     compression: Option<crate::Compression>,
+    #[cfg(feature = "http-retry")]
+    retry_policy: RetryPolicy,
     #[allow(dead_code)]
     // <allow dead> would be removed once we support set_resource for metrics and traces.
     resource: opentelemetry_proto::transform::common::tonic::ResourceAttributesWithSchema,
@@ -379,16 +387,9 @@ impl OtlpHttpClient {
     {
         #[cfg(feature = "http-retry")]
         {
-            use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+            use opentelemetry_sdk::retry::retry_with_backoff;
             use opentelemetry_sdk::runtime::Tokio;
 
-            let policy = RetryPolicy {
-                max_retries: 3,
-                initial_delay_ms: 100,
-                max_delay_ms: 1600,
-                jitter_ms: 100,
-            };
-
             // Build request body once before retry loop
             let (body, content_type, content_encoding) = build_body_fn(self, data)
                 .map_err(opentelemetry_sdk::error::OTelSdkError::InternalFailure)?;
@@ -401,7 +402,7 @@ impl OtlpHttpClient {
 
             retry_with_backoff(
                 Tokio,
-                policy,
+                self.retry_policy.clone(),
                 classify_http_export_error,
                 operation_name,
                 || async {
@@ -544,6 +545,7 @@ impl OtlpHttpClient {
         protocol: Protocol,
         timeout: Duration,
         compression: Option<crate::Compression>,
+        #[cfg(feature = "http-retry")] retry_policy: Option<RetryPolicy>,
     ) -> Self {
         OtlpHttpClient {
             client: Mutex::new(Some(client)),
@@ -552,6 +554,13 @@ impl OtlpHttpClient {
             protocol,
             _timeout: timeout,
             compression,
+            #[cfg(feature = "http-retry")]
+            retry_policy: retry_policy.unwrap_or(RetryPolicy {
+                max_retries: 3,
+                initial_delay_ms: 100,
+                max_delay_ms: 1600,
+                jitter_ms: 100,
+            }),
             resource: ResourceAttributesWithSchema::default(),
         }
     }
@@ -722,6 +731,10 @@ pub trait WithHttpConfig {
 
     /// Set the compression algorithm to use when communicating with the collector.
     fn with_compression(self, compression: crate::Compression) -> Self;
+
+    /// Set the retry policy for HTTP requests.
+    #[cfg(feature = "http-retry")]
+    fn with_retry_policy(self, policy: RetryPolicy) -> Self;
 }
 
 impl<B: HasHttpConfig> WithHttpConfig for B {
@@ -746,6 +759,12 @@ impl<B: HasHttpConfig> WithHttpConfig for B {
         self.http_client_config().compression = Some(compression);
         self
     }
+
+    #[cfg(feature = "http-retry")]
+    fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
+        self.http_client_config().retry_policy = Some(policy);
+        self
+    }
 }
 
 #[cfg(test)]
@@ -991,6 +1010,8 @@ mod tests {
                 client: None,
                 headers: Some(initial_headers),
                 compression: None,
+                #[cfg(feature = "http-retry")]
+                retry_policy: None,
             },
             exporter_config: crate::ExportConfig::default(),
         };
@@ -1057,6 +1078,8 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Gzip),
+                #[cfg(feature = "http-retry")]
+                None,
             );
 
             // Test with some sample data
@@ -1088,6 +1111,8 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Zstd),
+                #[cfg(feature = "http-retry")]
+                None,
             );
 
             // Test with some sample data
@@ -1116,6 +1141,8 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 None, // No compression
+                #[cfg(feature = "http-retry")]
+                None,
             );
 
             let body = vec![1, 2, 3, 4];
@@ -1137,6 +1164,8 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Gzip),
+                #[cfg(feature = "http-retry")]
+                None,
             );
 
             let body = vec![1, 2, 3, 4];
@@ -1159,6 +1188,8 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Zstd),
+                #[cfg(feature = "http-retry")]
+                None,
             );
 
             let body = vec![1, 2, 3, 4];
@@ -1221,6 +1252,8 @@ mod tests {
                 protocol,
                 std::time::Duration::from_secs(10),
                 compression,
+                #[cfg(feature = "http-retry")]
+                None,
             )
         }
 
@@ -1454,5 +1487,70 @@ mod tests {
                 Err(ExporterBuildError::UnsupportedCompressionAlgorithm(_))
             ));
         }
+
+        #[cfg(feature = "http-retry")]
+        #[test]
+        fn test_with_retry_policy() {
+            use super::super::HttpExporterBuilder;
+            use crate::WithHttpConfig;
+            use opentelemetry_sdk::retry::RetryPolicy;
+
+            let custom_policy = RetryPolicy {
+                max_retries: 5,
+                initial_delay_ms: 200,
+                max_delay_ms: 3200,
+                jitter_ms: 50,
+            };
+
+            let builder = HttpExporterBuilder::default().with_retry_policy(custom_policy);
+
+            // Verify the retry policy was set
+            let retry_policy = builder.http_config.retry_policy.as_ref().unwrap();
+            assert_eq!(retry_policy.max_retries, 5);
+            assert_eq!(retry_policy.initial_delay_ms, 200);
+            assert_eq!(retry_policy.max_delay_ms, 3200);
+            assert_eq!(retry_policy.jitter_ms, 50);
+        }
+
+        #[cfg(feature = "http-retry")]
+        #[test]
+        fn test_default_retry_policy_when_none_configured() {
+            let client = create_test_client(crate::Protocol::HttpBinary, None);
+            
+            // Verify default values are used
+            assert_eq!(client.retry_policy.max_retries, 3);
+            assert_eq!(client.retry_policy.initial_delay_ms, 100);
+            assert_eq!(client.retry_policy.max_delay_ms, 1600);
+            assert_eq!(client.retry_policy.jitter_ms, 100);
+        }
+
+        #[cfg(feature = "http-retry")]
+        #[test]
+        fn test_custom_retry_policy_used() {
+            use opentelemetry_sdk::retry::RetryPolicy;
+            
+            let custom_policy = RetryPolicy {
+                max_retries: 7,
+                initial_delay_ms: 500,
+                max_delay_ms: 5000,
+                jitter_ms: 200,
+            };
+
+            let client = OtlpHttpClient::new(
+                std::sync::Arc::new(MockHttpClient),
+                "http://localhost:4318".parse().unwrap(),
+                HashMap::new(),
+                crate::Protocol::HttpBinary,
+                std::time::Duration::from_secs(10),
+                None,
+                Some(custom_policy),
+            );
+            
+            // Verify custom values are used
+            assert_eq!(client.retry_policy.max_retries, 7);
+            assert_eq!(client.retry_policy.initial_delay_ms, 500);
+            assert_eq!(client.retry_policy.max_delay_ms, 5000);
+            assert_eq!(client.retry_policy.jitter_ms, 200);
+        }
     }
 }
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index fee1244686..650962ba28 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -20,6 +20,7 @@ use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicLogsClient {
     inner: Mutex<Option<ClientInner>>,
+    retry_policy: RetryPolicy,
     #[allow(dead_code)]
     // <allow dead> would be removed once we support set_resource for metrics.
     resource: opentelemetry_proto::transform::common::tonic::ResourceAttributesWithSchema,
@@ -41,6 +42,7 @@ impl TonicLogsClient {
         channel: Channel,
         interceptor: BoxInterceptor,
         compression: Option<CompressionEncoding>,
+        retry_policy: Option<RetryPolicy>,
     ) -> Self {
         let mut client = LogsServiceClient::new(channel);
         if let Some(compression) = compression {
@@ -56,6 +58,12 @@ impl TonicLogsClient {
                 client,
                 interceptor,
             })),
+            retry_policy: retry_policy.unwrap_or(RetryPolicy {
+                max_retries: 3,
+                initial_delay_ms: 100,
+                max_delay_ms: 1600,
+                jitter_ms: 100,
+            }),
             resource: Default::default(),
         }
     }
@@ -63,18 +71,11 @@ impl TonicLogsClient {
 
 impl LogExporter for TonicLogsClient {
     async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
         let batch = Arc::new(batch);
 
         match retry_with_backoff(
             Tokio,
-            policy,
+            self.retry_policy.clone(),
             classify_tonic_status,
             "TonicLogsClient.Export",
             || async {
diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index d6244b4bae..223df3fd2f 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -18,6 +18,7 @@ use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicMetricsClient {
     inner: Mutex<Option<ClientInner>>,
+    retry_policy: RetryPolicy,
 }
 
 struct ClientInner {
@@ -36,6 +37,7 @@ impl TonicMetricsClient {
         channel: Channel,
         interceptor: BoxInterceptor,
         compression: Option<CompressionEncoding>,
+        retry_policy: Option<RetryPolicy>,
     ) -> Self {
         let mut client = MetricsServiceClient::new(channel);
         if let Some(compression) = compression {
@@ -51,22 +53,21 @@ impl TonicMetricsClient {
                 client,
                 interceptor,
             })),
+            retry_policy: retry_policy.unwrap_or(RetryPolicy {
+                max_retries: 3,
+                initial_delay_ms: 100,
+                max_delay_ms: 1600,
+                jitter_ms: 100,
+            }),
         }
     }
 }
 
 impl MetricsClient for TonicMetricsClient {
     async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
         match retry_with_backoff(
             Tokio,
-            policy,
+            self.retry_policy.clone(),
             classify_tonic_status,
             "TonicMetricsClient.Export",
             || async {
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index 6986107602..ee8f11f2a1 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -16,6 +16,9 @@ use super::{resolve_timeout, ExporterBuildError};
 use crate::exporter::Compression;
 use crate::{ExportConfig, OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_HEADERS};
 
+#[cfg(feature = "grpc-tonic")]
+use opentelemetry_sdk::retry::RetryPolicy;
+
 #[cfg(feature = "logs")]
 pub(crate) mod logs;
 
@@ -40,6 +43,9 @@ pub struct TonicConfig {
     pub(crate) compression: Option<Compression>,
     pub(crate) channel: Option<tonic::transport::Channel>,
     pub(crate) interceptor: Option<BoxInterceptor>,
+    /// The retry policy to use for gRPC requests.
+    #[cfg(feature = "grpc-tonic")]
+    pub(crate) retry_policy: Option<RetryPolicy>,
 }
 
 impl TryFrom<Compression> for tonic::codec::CompressionEncoding {
@@ -132,6 +138,8 @@ impl Default for TonicExporterBuilder {
                 compression: None,
                 channel: Option::default(),
                 interceptor: Option::default(),
+                #[cfg(feature = "grpc-tonic")]
+                retry_policy: None,
             },
             exporter_config: ExportConfig {
                 protocol: crate::Protocol::Grpc,
@@ -150,7 +158,7 @@ impl TonicExporterBuilder {
         signal_timeout_var: &str,
         signal_compression_var: &str,
         signal_headers_var: &str,
-    ) -> Result<(Channel, BoxInterceptor, Option<CompressionEncoding>), ExporterBuildError> {
+    ) -> Result<(Channel, BoxInterceptor, Option<CompressionEncoding>, Option<RetryPolicy>), ExporterBuildError> {
         let compression = self.resolve_compression(signal_compression_var)?;
 
         let (headers_from_env, headers_for_logging) = parse_headers_from_env(signal_headers_var);
@@ -181,9 +189,15 @@ impl TonicExporterBuilder {
             None => BoxInterceptor(Box::new(add_metadata)),
         };
 
+        // Get retry policy before consuming self
+        #[cfg(feature = "grpc-tonic")]
+        let retry_policy = self.tonic_config.retry_policy.clone();
+        
         // If a custom channel was provided, use that channel instead of creating one
         if let Some(channel) = self.tonic_config.channel {
-            return Ok((channel, interceptor, compression));
+            return Ok((channel, interceptor, compression, 
+                #[cfg(feature = "grpc-tonic")] retry_policy,
+                #[cfg(not(feature = "grpc-tonic"))] None));
         }
 
         let config = self.exporter_config;
@@ -211,7 +225,9 @@ impl TonicExporterBuilder {
         let channel = endpoint.timeout(timeout).connect_lazy();
 
         otel_debug!(name: "TonicChannelBuilt", endpoint = endpoint_clone, timeout_in_millisecs = timeout.as_millis(), compression = format!("{:?}", compression), headers = format!("{:?}", headers_for_logging));
-        Ok((channel, interceptor, compression))
+        Ok((channel, interceptor, compression, 
+            #[cfg(feature = "grpc-tonic")] retry_policy,
+            #[cfg(not(feature = "grpc-tonic"))] None))
     }
 
     fn resolve_endpoint(default_endpoint_var: &str, provided_endpoint: Option<String>) -> String {
@@ -249,14 +265,14 @@ impl TonicExporterBuilder {
 
         otel_debug!(name: "LogsTonicChannelBuilding");
 
-        let (channel, interceptor, compression) = self.build_channel(
+        let (channel, interceptor, compression, retry_policy) = self.build_channel(
             crate::logs::OTEL_EXPORTER_OTLP_LOGS_ENDPOINT,
             crate::logs::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT,
             crate::logs::OTEL_EXPORTER_OTLP_LOGS_COMPRESSION,
             crate::logs::OTEL_EXPORTER_OTLP_LOGS_HEADERS,
         )?;
 
-        let client = TonicLogsClient::new(channel, interceptor, compression);
+        let client = TonicLogsClient::new(channel, interceptor, compression, retry_policy);
 
         Ok(crate::logs::LogExporter::from_tonic(client))
     }
@@ -272,14 +288,14 @@ impl TonicExporterBuilder {
 
         otel_debug!(name: "MetricsTonicChannelBuilding");
 
-        let (channel, interceptor, compression) = self.build_channel(
+        let (channel, interceptor, compression, retry_policy) = self.build_channel(
             crate::metric::OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
             crate::metric::OTEL_EXPORTER_OTLP_METRICS_TIMEOUT,
             crate::metric::OTEL_EXPORTER_OTLP_METRICS_COMPRESSION,
             crate::metric::OTEL_EXPORTER_OTLP_METRICS_HEADERS,
         )?;
 
-        let client = TonicMetricsClient::new(channel, interceptor, compression);
+        let client = TonicMetricsClient::new(channel, interceptor, compression, retry_policy);
 
         Ok(MetricExporter::from_tonic(client, temporality))
     }
@@ -291,14 +307,14 @@ impl TonicExporterBuilder {
 
         otel_debug!(name: "TracesTonicChannelBuilding");
 
-        let (channel, interceptor, compression) = self.build_channel(
+        let (channel, interceptor, compression, retry_policy) = self.build_channel(
             crate::span::OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
             crate::span::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
             crate::span::OTEL_EXPORTER_OTLP_TRACES_COMPRESSION,
             crate::span::OTEL_EXPORTER_OTLP_TRACES_HEADERS,
         )?;
 
-        let client = TonicTracesClient::new(channel, interceptor, compression);
+        let client = TonicTracesClient::new(channel, interceptor, compression, retry_policy);
 
         Ok(crate::SpanExporter::from_tonic(client))
     }
@@ -475,6 +491,10 @@ pub trait WithTonicConfig {
     fn with_interceptor<I>(self, interceptor: I) -> Self
     where
         I: tonic::service::Interceptor + Clone + Send + Sync + 'static;
+
+    /// Set the retry policy for gRPC requests.
+    #[cfg(feature = "grpc-tonic")]
+    fn with_retry_policy(self, policy: RetryPolicy) -> Self;
 }
 
 impl<B: HasTonicConfig> WithTonicConfig for B {
@@ -516,6 +536,12 @@ impl<B: HasTonicConfig> WithTonicConfig for B {
         self.tonic_config().interceptor = Some(BoxInterceptor(Box::new(interceptor)));
         self
     }
+
+    #[cfg(feature = "grpc-tonic")]
+    fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
+        self.tonic_config().retry_policy = Some(policy);
+        self
+    }
 }
 
 #[cfg(test)]
@@ -748,4 +774,36 @@ mod tests {
             assert_eq!(url, "http://localhost:4317");
         });
     }
+
+    #[cfg(feature = "grpc-tonic")]
+    #[test]
+    fn test_with_retry_policy() {
+        use crate::WithTonicConfig;
+        use opentelemetry_sdk::retry::RetryPolicy;
+
+        let custom_policy = RetryPolicy {
+            max_retries: 5,
+            initial_delay_ms: 200,
+            max_delay_ms: 3200,
+            jitter_ms: 50,
+        };
+
+        let builder = TonicExporterBuilder::default().with_retry_policy(custom_policy);
+
+        // Verify the retry policy was set
+        let retry_policy = builder.tonic_config.retry_policy.as_ref().unwrap();
+        assert_eq!(retry_policy.max_retries, 5);
+        assert_eq!(retry_policy.initial_delay_ms, 200);
+        assert_eq!(retry_policy.max_delay_ms, 3200);
+        assert_eq!(retry_policy.jitter_ms, 50);
+    }
+
+    #[cfg(feature = "grpc-tonic")]
+    #[test]
+    fn test_default_retry_policy_when_none_configured() {
+        // This test requires us to create a tonic client, but we can't easily do that without
+        // a channel in a unit test. The default behavior is tested implicitly in integration tests.
+        let builder = TonicExporterBuilder::default();
+        assert!(builder.tonic_config.retry_policy.is_none());
+    }
 }
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index a5239b4602..5e32660998 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -22,6 +22,7 @@ use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicTracesClient {
     inner: Option<ClientInner>,
+    retry_policy: RetryPolicy,
     #[allow(dead_code)]
     // <allow dead> would be removed once we support set_resource for metrics.
     resource: opentelemetry_proto::transform::common::tonic::ResourceAttributesWithSchema,
@@ -43,6 +44,7 @@ impl TonicTracesClient {
         channel: Channel,
         interceptor: BoxInterceptor,
         compression: Option<CompressionEncoding>,
+        retry_policy: Option<RetryPolicy>,
     ) -> Self {
         let mut client = TraceServiceClient::new(channel);
         if let Some(compression) = compression {
@@ -58,6 +60,12 @@ impl TonicTracesClient {
                 client,
                 interceptor: Mutex::new(interceptor),
             }),
+            retry_policy: retry_policy.unwrap_or(RetryPolicy {
+                max_retries: 3,
+                initial_delay_ms: 100,
+                max_delay_ms: 1600,
+                jitter_ms: 100,
+            }),
             resource: Default::default(),
         }
     }
@@ -65,18 +73,11 @@ impl TonicTracesClient {
 
 impl SpanExporter for TonicTracesClient {
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_delay_ms: 100,
-            max_delay_ms: 1600,
-            jitter_ms: 100,
-        };
-
         let batch = Arc::new(batch);
 
         match retry_with_backoff(
             Tokio,
-            policy,
+            self.retry_policy.clone(),
             classify_tonic_status,
             "TonicTracesClient.Export",
             || async {
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index e8de66d273..ac32e38732 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -409,6 +409,9 @@ pub use crate::exporter::{
     OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT,
 };
 
+#[cfg(feature = "http-retry")]
+pub use opentelemetry_sdk::retry::RetryPolicy;
+
 /// Type to indicate the builder does not have a client set.
 #[derive(Debug, Default, Clone)]
 pub struct NoExporterBuilderSet;
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-sdk/src/retry.rs
index 48f4cf49a7..17d1b183b0 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-sdk/src/retry.rs
@@ -32,7 +32,7 @@ pub enum RetryErrorType {
 }
 
 /// Configuration for retry policy.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct RetryPolicy {
     /// Maximum number of retry attempts.
     pub max_retries: usize,

From ff93dd518a93856f0568b560f109708baaf60b7e Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Fri, 5 Sep 2025 11:29:32 +0200
Subject: [PATCH 21/27] chore: modify tonic exporters so we don't _need_ the
 async runtime if we don't want retry

---
 opentelemetry-otlp/Cargo.toml                 |   8 +-
 opentelemetry-otlp/src/exporter/http/mod.rs   |  62 +++++-----
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  11 +-
 .../src/exporter/tonic/metrics.rs             |  11 +-
 opentelemetry-otlp/src/exporter/tonic/mod.rs  | 108 +++++++++++++++---
 .../src/exporter/tonic/trace.rs               |  11 +-
 opentelemetry-otlp/src/lib.rs                 |   4 +-
 7 files changed, 152 insertions(+), 63 deletions(-)

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index 10939cac5f..95b8c27166 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -70,10 +70,13 @@ serialize = ["serde", "serde_json"]
 default = ["http-proto", "reqwest-blocking-client", "trace", "metrics", "logs", "internal-logs"]
 
 # grpc using tonic
-grpc-tonic = ["tonic", "tonic-types", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic", "opentelemetry_sdk/rt-tokio", "opentelemetry_sdk/experimental_async_runtime"]
+grpc-tonic = ["tonic", "tonic-types", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic"]
 gzip-tonic = ["tonic/gzip"]
 zstd-tonic = ["tonic/zstd"]
 
+# grpc with retry support
+experimental-grpc-retry = ["grpc-tonic", "opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio"]
+
 # http compression
 gzip-http = ["flate2"]
 zstd-http = ["zstd"]
@@ -85,8 +88,7 @@ tls-webpki-roots = ["tls", "tonic/tls-webpki-roots"]
 http-proto = ["prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "http", "trace", "metrics"]
 
 # http with retry support.
-# What should we do with this? We need the async_runtime. gRPC exporters already need it.
-http-retry = ["opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio", "tokio"]
+experimental-http-retry = ["opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio", "tokio"]
 
 http-json = ["serde_json", "prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "opentelemetry-proto/with-serde", "http", "trace", "metrics"]
 reqwest-blocking-client = ["reqwest/blocking", "opentelemetry-http/reqwest-blocking"]
diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 296ebb6f3f..0f57a0292e 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -22,18 +22,18 @@ use std::str::FromStr;
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
-#[cfg(feature = "http-retry")]
+#[cfg(feature = "experimental-http-retry")]
 use crate::retry_classification::http::classify_http_error;
-#[cfg(feature = "http-retry")]
+#[cfg(feature = "experimental-http-retry")]
 use opentelemetry_sdk::retry::{RetryErrorType, RetryPolicy};
 
 // Shared HTTP retry functionality
 /// HTTP-specific error wrapper for retry classification
 #[derive(Debug)]
 pub(crate) struct HttpExportError {
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     pub status_code: u16,
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     pub retry_after: Option<String>,
     pub message: String,
 }
@@ -42,9 +42,9 @@ impl HttpExportError {
     /// Create a new HttpExportError without retry-after header
     pub(crate) fn new(_status_code: u16, message: String) -> Self {
         Self {
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             status_code: _status_code,
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             retry_after: None,
             message,
         }
@@ -57,16 +57,16 @@ impl HttpExportError {
         message: String,
     ) -> Self {
         Self {
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             status_code: _status_code,
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             retry_after: Some(_retry_after),
             message,
         }
     }
 }
 
-#[cfg(feature = "http-retry")]
+#[cfg(feature = "experimental-http-retry")]
 /// Classify HTTP export errors for retry decisions
 pub(crate) fn classify_http_export_error(error: &HttpExportError) -> RetryErrorType {
     classify_http_error(error.status_code, error.retry_after.as_deref())
@@ -113,7 +113,7 @@ pub struct HttpConfig {
     compression: Option<crate::Compression>,
 
     /// The retry policy to use for HTTP requests.
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     retry_policy: Option<RetryPolicy>,
 }
 
@@ -286,7 +286,7 @@ impl HttpExporterBuilder {
             self.exporter_config.protocol,
             timeout,
             compression,
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             self.http_config.retry_policy.take(),
         ))
     }
@@ -367,7 +367,7 @@ pub(crate) struct OtlpHttpClient {
     protocol: Protocol,
     _timeout: Duration,
     compression: Option<crate::Compression>,
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     retry_policy: RetryPolicy,
     #[allow(dead_code)]
     // <allow dead> would be removed once we support set_resource for metrics and traces.
@@ -385,7 +385,7 @@ impl OtlpHttpClient {
     where
         F: Fn(&Self, T) -> Result<(Vec<u8>, &'static str, Option<&'static str>), String>,
     {
-        #[cfg(feature = "http-retry")]
+        #[cfg(feature = "experimental-http-retry")]
         {
             use opentelemetry_sdk::retry::retry_with_backoff;
             use opentelemetry_sdk::runtime::Tokio;
@@ -419,7 +419,7 @@ impl OtlpHttpClient {
             .map_err(|e| opentelemetry_sdk::error::OTelSdkError::InternalFailure(e.message))
         }
 
-        #[cfg(not(feature = "http-retry"))]
+        #[cfg(not(feature = "experimental-http-retry"))]
         {
             let (body, content_type, content_encoding) = build_body_fn(self, data)
                 .map_err(opentelemetry_sdk::error::OTelSdkError::InternalFailure)?;
@@ -545,7 +545,7 @@ impl OtlpHttpClient {
         protocol: Protocol,
         timeout: Duration,
         compression: Option<crate::Compression>,
-        #[cfg(feature = "http-retry")] retry_policy: Option<RetryPolicy>,
+        #[cfg(feature = "experimental-http-retry")] retry_policy: Option<RetryPolicy>,
     ) -> Self {
         OtlpHttpClient {
             client: Mutex::new(Some(client)),
@@ -554,7 +554,7 @@ impl OtlpHttpClient {
             protocol,
             _timeout: timeout,
             compression,
-            #[cfg(feature = "http-retry")]
+            #[cfg(feature = "experimental-http-retry")]
             retry_policy: retry_policy.unwrap_or(RetryPolicy {
                 max_retries: 3,
                 initial_delay_ms: 100,
@@ -733,7 +733,7 @@ pub trait WithHttpConfig {
     fn with_compression(self, compression: crate::Compression) -> Self;
 
     /// Set the retry policy for HTTP requests.
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     fn with_retry_policy(self, policy: RetryPolicy) -> Self;
 }
 
@@ -760,7 +760,7 @@ impl<B: HasHttpConfig> WithHttpConfig for B {
         self
     }
 
-    #[cfg(feature = "http-retry")]
+    #[cfg(feature = "experimental-http-retry")]
     fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
         self.http_client_config().retry_policy = Some(policy);
         self
@@ -1010,7 +1010,7 @@ mod tests {
                 client: None,
                 headers: Some(initial_headers),
                 compression: None,
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 retry_policy: None,
             },
             exporter_config: crate::ExportConfig::default(),
@@ -1078,7 +1078,7 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Gzip),
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             );
 
@@ -1111,7 +1111,7 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Zstd),
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             );
 
@@ -1141,7 +1141,7 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 None, // No compression
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             );
 
@@ -1164,7 +1164,7 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Gzip),
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             );
 
@@ -1188,7 +1188,7 @@ mod tests {
                 crate::Protocol::HttpBinary,
                 std::time::Duration::from_secs(10),
                 Some(crate::Compression::Zstd),
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             );
 
@@ -1252,7 +1252,7 @@ mod tests {
                 protocol,
                 std::time::Duration::from_secs(10),
                 compression,
-                #[cfg(feature = "http-retry")]
+                #[cfg(feature = "experimental-http-retry")]
                 None,
             )
         }
@@ -1488,7 +1488,7 @@ mod tests {
             ));
         }
 
-        #[cfg(feature = "http-retry")]
+        #[cfg(feature = "experimental-http-retry")]
         #[test]
         fn test_with_retry_policy() {
             use super::super::HttpExporterBuilder;
@@ -1512,11 +1512,11 @@ mod tests {
             assert_eq!(retry_policy.jitter_ms, 50);
         }
 
-        #[cfg(feature = "http-retry")]
+        #[cfg(feature = "experimental-http-retry")]
         #[test]
         fn test_default_retry_policy_when_none_configured() {
             let client = create_test_client(crate::Protocol::HttpBinary, None);
-            
+
             // Verify default values are used
             assert_eq!(client.retry_policy.max_retries, 3);
             assert_eq!(client.retry_policy.initial_delay_ms, 100);
@@ -1524,11 +1524,11 @@ mod tests {
             assert_eq!(client.retry_policy.jitter_ms, 100);
         }
 
-        #[cfg(feature = "http-retry")]
+        #[cfg(feature = "experimental-http-retry")]
         #[test]
         fn test_custom_retry_policy_used() {
             use opentelemetry_sdk::retry::RetryPolicy;
-            
+
             let custom_policy = RetryPolicy {
                 max_retries: 7,
                 initial_delay_ms: 500,
@@ -1545,7 +1545,7 @@ mod tests {
                 None,
                 Some(custom_policy),
             );
-            
+
             // Verify custom values are used
             assert_eq!(client.retry_policy.max_retries, 7);
             assert_eq!(client.retry_policy.initial_delay_ms, 500);
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 650962ba28..071df38389 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -14,8 +14,8 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 
 use super::BoxInterceptor;
 
-use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+use opentelemetry_sdk::retry::RetryPolicy;
+#[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicLogsClient {
@@ -73,10 +73,13 @@ impl LogExporter for TonicLogsClient {
     async fn export(&self, batch: LogBatch<'_>) -> OTelSdkResult {
         let batch = Arc::new(batch);
 
-        match retry_with_backoff(
+        match super::tonic_retry_with_backoff(
+            #[cfg(feature = "experimental-grpc-retry")]
             Tokio,
+            #[cfg(not(feature = "experimental-grpc-retry"))]
+            (),
             self.retry_policy.clone(),
-            classify_tonic_status,
+            crate::retry_classification::grpc::classify_tonic_status,
             "TonicLogsClient.Export",
             || async {
                 let batch_clone = Arc::clone(&batch);
diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index 223df3fd2f..aaa50bfb39 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -12,8 +12,8 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 use super::BoxInterceptor;
 use crate::metric::MetricsClient;
 
-use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+use opentelemetry_sdk::retry::RetryPolicy;
+#[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicMetricsClient {
@@ -65,10 +65,13 @@ impl TonicMetricsClient {
 
 impl MetricsClient for TonicMetricsClient {
     async fn export(&self, metrics: &ResourceMetrics) -> OTelSdkResult {
-        match retry_with_backoff(
+        match super::tonic_retry_with_backoff(
+            #[cfg(feature = "experimental-grpc-retry")]
             Tokio,
+            #[cfg(not(feature = "experimental-grpc-retry"))]
+            (),
             self.retry_policy.clone(),
-            classify_tonic_status,
+            crate::retry_classification::grpc::classify_tonic_status,
             "TonicMetricsClient.Export",
             || async {
                 // Execute the export operation
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index ee8f11f2a1..0ff72b5970 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -16,8 +16,23 @@ use super::{resolve_timeout, ExporterBuildError};
 use crate::exporter::Compression;
 use crate::{ExportConfig, OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_HEADERS};
 
+#[cfg(all(
+    feature = "experimental-grpc-retry",
+    any(feature = "trace", feature = "metrics", feature = "logs")
+))]
+use opentelemetry_sdk::retry::retry_with_backoff;
 #[cfg(feature = "grpc-tonic")]
 use opentelemetry_sdk::retry::RetryPolicy;
+#[cfg(all(
+    feature = "experimental-grpc-retry",
+    any(feature = "trace", feature = "metrics", feature = "logs")
+))]
+use opentelemetry_sdk::runtime::Runtime;
+#[cfg(all(
+    feature = "grpc-tonic",
+    any(feature = "trace", feature = "metrics", feature = "logs")
+))]
+use std::future::Future;
 
 #[cfg(feature = "logs")]
 pub(crate) mod logs;
@@ -44,7 +59,7 @@ pub struct TonicConfig {
     pub(crate) channel: Option<tonic::transport::Channel>,
     pub(crate) interceptor: Option<BoxInterceptor>,
     /// The retry policy to use for gRPC requests.
-    #[cfg(feature = "grpc-tonic")]
+    #[cfg(feature = "experimental-grpc-retry")]
     pub(crate) retry_policy: Option<RetryPolicy>,
 }
 
@@ -138,7 +153,7 @@ impl Default for TonicExporterBuilder {
                 compression: None,
                 channel: Option::default(),
                 interceptor: Option::default(),
-                #[cfg(feature = "grpc-tonic")]
+                #[cfg(feature = "experimental-grpc-retry")]
                 retry_policy: None,
             },
             exporter_config: ExportConfig {
@@ -158,7 +173,15 @@ impl TonicExporterBuilder {
         signal_timeout_var: &str,
         signal_compression_var: &str,
         signal_headers_var: &str,
-    ) -> Result<(Channel, BoxInterceptor, Option<CompressionEncoding>, Option<RetryPolicy>), ExporterBuildError> {
+    ) -> Result<
+        (
+            Channel,
+            BoxInterceptor,
+            Option<CompressionEncoding>,
+            Option<RetryPolicy>,
+        ),
+        ExporterBuildError,
+    > {
         let compression = self.resolve_compression(signal_compression_var)?;
 
         let (headers_from_env, headers_for_logging) = parse_headers_from_env(signal_headers_var);
@@ -190,14 +213,20 @@ impl TonicExporterBuilder {
         };
 
         // Get retry policy before consuming self
-        #[cfg(feature = "grpc-tonic")]
+        #[cfg(feature = "experimental-grpc-retry")]
         let retry_policy = self.tonic_config.retry_policy.clone();
-        
+
         // If a custom channel was provided, use that channel instead of creating one
         if let Some(channel) = self.tonic_config.channel {
-            return Ok((channel, interceptor, compression, 
-                #[cfg(feature = "grpc-tonic")] retry_policy,
-                #[cfg(not(feature = "grpc-tonic"))] None));
+            return Ok((
+                channel,
+                interceptor,
+                compression,
+                #[cfg(feature = "experimental-grpc-retry")]
+                retry_policy,
+                #[cfg(not(feature = "experimental-grpc-retry"))]
+                None,
+            ));
         }
 
         let config = self.exporter_config;
@@ -225,9 +254,15 @@ impl TonicExporterBuilder {
         let channel = endpoint.timeout(timeout).connect_lazy();
 
         otel_debug!(name: "TonicChannelBuilt", endpoint = endpoint_clone, timeout_in_millisecs = timeout.as_millis(), compression = format!("{:?}", compression), headers = format!("{:?}", headers_for_logging));
-        Ok((channel, interceptor, compression, 
-            #[cfg(feature = "grpc-tonic")] retry_policy,
-            #[cfg(not(feature = "grpc-tonic"))] None))
+        Ok((
+            channel,
+            interceptor,
+            compression,
+            #[cfg(feature = "experimental-grpc-retry")]
+            retry_policy,
+            #[cfg(not(feature = "experimental-grpc-retry"))]
+            None,
+        ))
     }
 
     fn resolve_endpoint(default_endpoint_var: &str, provided_endpoint: Option<String>) -> String {
@@ -320,6 +355,49 @@ impl TonicExporterBuilder {
     }
 }
 
+/// Wrapper for retry functionality in tonic exporters.
+/// Provides a unified call path that either uses retry_with_backoff when experimental-grpc-retry
+/// feature is enabled, or executes the operation once when it's not.
+#[cfg(all(
+    feature = "grpc-tonic",
+    feature = "experimental-grpc-retry",
+    any(feature = "trace", feature = "metrics", feature = "logs")
+))]
+async fn tonic_retry_with_backoff<R, F, Fut, T>(
+    runtime: R,
+    policy: RetryPolicy,
+    classify_fn: fn(&tonic::Status) -> opentelemetry_sdk::retry::RetryErrorType,
+    operation_name: &'static str,
+    operation: F,
+) -> Result<T, tonic::Status>
+where
+    R: Runtime,
+    F: Fn() -> Fut,
+    Fut: Future<Output = Result<T, tonic::Status>>,
+{
+    retry_with_backoff(runtime, policy, classify_fn, operation_name, operation).await
+}
+
+/// Provides a unified call path when experimental-grpc-retry is not enabled - just executes the operation once.
+#[cfg(all(
+    feature = "grpc-tonic",
+    not(feature = "experimental-grpc-retry"),
+    any(feature = "trace", feature = "metrics", feature = "logs")
+))]
+async fn tonic_retry_with_backoff<F, Fut, T>(
+    _runtime: (),
+    _policy: RetryPolicy,
+    _classify_fn: fn(&tonic::Status) -> opentelemetry_sdk::retry::RetryErrorType,
+    _operation_name: &'static str,
+    operation: F,
+) -> Result<T, tonic::Status>
+where
+    F: Fn() -> Fut,
+    Fut: Future<Output = Result<T, tonic::Status>>,
+{
+    operation().await
+}
+
 fn merge_metadata_with_headers_from_env(
     metadata: MetadataMap,
     headers_from_env: HeaderMap,
@@ -493,7 +571,7 @@ pub trait WithTonicConfig {
         I: tonic::service::Interceptor + Clone + Send + Sync + 'static;
 
     /// Set the retry policy for gRPC requests.
-    #[cfg(feature = "grpc-tonic")]
+    #[cfg(feature = "experimental-grpc-retry")]
     fn with_retry_policy(self, policy: RetryPolicy) -> Self;
 }
 
@@ -537,7 +615,7 @@ impl<B: HasTonicConfig> WithTonicConfig for B {
         self
     }
 
-    #[cfg(feature = "grpc-tonic")]
+    #[cfg(feature = "experimental-grpc-retry")]
     fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
         self.tonic_config().retry_policy = Some(policy);
         self
@@ -775,7 +853,7 @@ mod tests {
         });
     }
 
-    #[cfg(feature = "grpc-tonic")]
+    #[cfg(feature = "experimental-grpc-retry")]
     #[test]
     fn test_with_retry_policy() {
         use crate::WithTonicConfig;
@@ -798,7 +876,7 @@ mod tests {
         assert_eq!(retry_policy.jitter_ms, 50);
     }
 
-    #[cfg(feature = "grpc-tonic")]
+    #[cfg(feature = "experimental-grpc-retry")]
     #[test]
     fn test_default_retry_policy_when_none_configured() {
         // This test requires us to create a tonic client, but we can't easily do that without
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index 5e32660998..c426ff7fcc 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -16,8 +16,8 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 
 use super::BoxInterceptor;
 
-use crate::retry_classification::grpc::classify_tonic_status;
-use opentelemetry_sdk::retry::{retry_with_backoff, RetryPolicy};
+use opentelemetry_sdk::retry::RetryPolicy;
+#[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
 pub(crate) struct TonicTracesClient {
@@ -75,10 +75,13 @@ impl SpanExporter for TonicTracesClient {
     async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
         let batch = Arc::new(batch);
 
-        match retry_with_backoff(
+        match super::tonic_retry_with_backoff(
+            #[cfg(feature = "experimental-grpc-retry")]
             Tokio,
+            #[cfg(not(feature = "experimental-grpc-retry"))]
+            (),
             self.retry_policy.clone(),
-            classify_tonic_status,
+            crate::retry_classification::grpc::classify_tonic_status,
             "TonicTracesClient.Export",
             || async {
                 let batch_clone = Arc::clone(&batch);
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index ac32e38732..1c178e79fd 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -366,7 +366,7 @@ mod metric;
 #[cfg(any(feature = "http-proto", feature = "http-json", feature = "grpc-tonic"))]
 mod span;
 
-#[cfg(any(feature = "grpc-tonic", feature = "http-retry"))]
+#[cfg(any(feature = "grpc-tonic", feature = "experimental-http-retry"))]
 pub mod retry_classification;
 
 pub use crate::exporter::Compression;
@@ -409,7 +409,7 @@ pub use crate::exporter::{
     OTEL_EXPORTER_OTLP_TIMEOUT_DEFAULT,
 };
 
-#[cfg(feature = "http-retry")]
+#[cfg(feature = "experimental-http-retry")]
 pub use opentelemetry_sdk::retry::RetryPolicy;
 
 /// Type to indicate the builder does not have a client set.

From 9bf0161ddd86f86ea00345112cb70bf7e0646586 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Fri, 5 Sep 2025 13:05:17 +0200
Subject: [PATCH 22/27] chore: fix missing dep

---
 Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Cargo.toml b/Cargo.toml
index 010ce42b48..5fbac9ddf4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,6 +44,7 @@ thiserror = { version = "2", default-features = false }
 tonic = { version = "0.14.1", default-features = false }
 tonic-prost-build = "0.14.1"
 tonic-prost = "0.14.1"
+tonic-types = "0.14.1"
 tokio = { version = "1", default-features = false }
 tokio-stream = "0.1"
 # Using `tracing 0.1.40` because 0.1.39 (which is yanked) introduces the ability to set event names in macros,

From d449457809b4db1c125978629731420b3f9a63d8 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Mon, 6 Oct 2025 09:21:24 +0200
Subject: [PATCH 23/27] chore: move retry from sdk down to otlp for now

---
 opentelemetry-otlp/src/exporter/http/mod.rs   |  8 +++---
 opentelemetry-otlp/src/exporter/tonic/logs.rs |  2 +-
 .../src/exporter/tonic/metrics.rs             |  2 +-
 opentelemetry-otlp/src/exporter/tonic/mod.rs  | 10 +++----
 .../src/exporter/tonic/trace.rs               |  2 +-
 opentelemetry-otlp/src/lib.rs                 |  6 +++-
 .../src/retry.rs                              | 28 +++++++++++--------
 .../src/retry_classification.rs               |  4 +--
 opentelemetry-sdk/src/lib.rs                  |  2 --
 9 files changed, 35 insertions(+), 29 deletions(-)
 rename {opentelemetry-sdk => opentelemetry-otlp}/src/retry.rs (94%)

diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 0f57a0292e..65e702c40f 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -25,7 +25,7 @@ use std::time::Duration;
 #[cfg(feature = "experimental-http-retry")]
 use crate::retry_classification::http::classify_http_error;
 #[cfg(feature = "experimental-http-retry")]
-use opentelemetry_sdk::retry::{RetryErrorType, RetryPolicy};
+use crate::retry::{RetryErrorType, RetryPolicy};
 
 // Shared HTTP retry functionality
 /// HTTP-specific error wrapper for retry classification
@@ -387,7 +387,7 @@ impl OtlpHttpClient {
     {
         #[cfg(feature = "experimental-http-retry")]
         {
-            use opentelemetry_sdk::retry::retry_with_backoff;
+            use crate::retry::retry_with_backoff;
             use opentelemetry_sdk::runtime::Tokio;
 
             // Build request body once before retry loop
@@ -1493,7 +1493,7 @@ mod tests {
         fn test_with_retry_policy() {
             use super::super::HttpExporterBuilder;
             use crate::WithHttpConfig;
-            use opentelemetry_sdk::retry::RetryPolicy;
+            use crate::retry::RetryPolicy;
 
             let custom_policy = RetryPolicy {
                 max_retries: 5,
@@ -1527,7 +1527,7 @@ mod tests {
         #[cfg(feature = "experimental-http-retry")]
         #[test]
         fn test_custom_retry_policy_used() {
-            use opentelemetry_sdk::retry::RetryPolicy;
+            use crate::retry::RetryPolicy;
 
             let custom_policy = RetryPolicy {
                 max_retries: 7,
diff --git a/opentelemetry-otlp/src/exporter/tonic/logs.rs b/opentelemetry-otlp/src/exporter/tonic/logs.rs
index 071df38389..679e211a3d 100644
--- a/opentelemetry-otlp/src/exporter/tonic/logs.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/logs.rs
@@ -14,7 +14,7 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
 
 use super::BoxInterceptor;
 
-use opentelemetry_sdk::retry::RetryPolicy;
+use crate::retry::RetryPolicy;
 #[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/metrics.rs b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
index aaa50bfb39..58470f9d30 100644
--- a/opentelemetry-otlp/src/exporter/tonic/metrics.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/metrics.rs
@@ -12,7 +12,7 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 use super::BoxInterceptor;
 use crate::metric::MetricsClient;
 
-use opentelemetry_sdk::retry::RetryPolicy;
+use crate::retry::RetryPolicy;
 #[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index 0ff72b5970..d99e298c06 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -20,9 +20,9 @@ use crate::{ExportConfig, OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_HEADER
     feature = "experimental-grpc-retry",
     any(feature = "trace", feature = "metrics", feature = "logs")
 ))]
-use opentelemetry_sdk::retry::retry_with_backoff;
+use crate::retry::retry_with_backoff;
 #[cfg(feature = "grpc-tonic")]
-use opentelemetry_sdk::retry::RetryPolicy;
+use crate::retry::RetryPolicy;
 #[cfg(all(
     feature = "experimental-grpc-retry",
     any(feature = "trace", feature = "metrics", feature = "logs")
@@ -366,7 +366,7 @@ impl TonicExporterBuilder {
 async fn tonic_retry_with_backoff<R, F, Fut, T>(
     runtime: R,
     policy: RetryPolicy,
-    classify_fn: fn(&tonic::Status) -> opentelemetry_sdk::retry::RetryErrorType,
+    classify_fn: fn(&tonic::Status) -> crate::retry::RetryErrorType,
     operation_name: &'static str,
     operation: F,
 ) -> Result<T, tonic::Status>
@@ -387,7 +387,7 @@ where
 async fn tonic_retry_with_backoff<F, Fut, T>(
     _runtime: (),
     _policy: RetryPolicy,
-    _classify_fn: fn(&tonic::Status) -> opentelemetry_sdk::retry::RetryErrorType,
+    _classify_fn: fn(&tonic::Status) -> crate::retry::RetryErrorType,
     _operation_name: &'static str,
     operation: F,
 ) -> Result<T, tonic::Status>
@@ -857,7 +857,7 @@ mod tests {
     #[test]
     fn test_with_retry_policy() {
         use crate::WithTonicConfig;
-        use opentelemetry_sdk::retry::RetryPolicy;
+        use crate::retry::RetryPolicy;
 
         let custom_policy = RetryPolicy {
             max_retries: 5,
diff --git a/opentelemetry-otlp/src/exporter/tonic/trace.rs b/opentelemetry-otlp/src/exporter/tonic/trace.rs
index c426ff7fcc..cfead52e58 100644
--- a/opentelemetry-otlp/src/exporter/tonic/trace.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/trace.rs
@@ -16,7 +16,7 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
 
 use super::BoxInterceptor;
 
-use opentelemetry_sdk::retry::RetryPolicy;
+use crate::retry::RetryPolicy;
 #[cfg(feature = "experimental-grpc-retry")]
 use opentelemetry_sdk::runtime::Tokio;
 
diff --git a/opentelemetry-otlp/src/lib.rs b/opentelemetry-otlp/src/lib.rs
index 1c178e79fd..7bcc535939 100644
--- a/opentelemetry-otlp/src/lib.rs
+++ b/opentelemetry-otlp/src/lib.rs
@@ -369,6 +369,10 @@ mod span;
 #[cfg(any(feature = "grpc-tonic", feature = "experimental-http-retry"))]
 pub mod retry_classification;
 
+/// Retry logic for exporting telemetry data.
+#[cfg(any(feature = "grpc-tonic", feature = "experimental-http-retry"))]
+pub mod retry;
+
 pub use crate::exporter::Compression;
 pub use crate::exporter::ExportConfig;
 pub use crate::exporter::ExporterBuildError;
@@ -410,7 +414,7 @@ pub use crate::exporter::{
 };
 
 #[cfg(feature = "experimental-http-retry")]
-pub use opentelemetry_sdk::retry::RetryPolicy;
+pub use retry::RetryPolicy;
 
 /// Type to indicate the builder does not have a client set.
 #[derive(Debug, Default, Clone)]
diff --git a/opentelemetry-sdk/src/retry.rs b/opentelemetry-otlp/src/retry.rs
similarity index 94%
rename from opentelemetry-sdk/src/retry.rs
rename to opentelemetry-otlp/src/retry.rs
index 17d1b183b0..23f8327c4d 100644
--- a/opentelemetry-sdk/src/retry.rs
+++ b/opentelemetry-otlp/src/retry.rs
@@ -8,16 +8,17 @@
 //! retries. The function uses error classification to determine retry behavior and can honor
 //! server-provided throttling hints.
 
-#[cfg(feature = "experimental_async_runtime")]
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
 use opentelemetry::otel_warn;
-#[cfg(feature = "experimental_async_runtime")]
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
 use std::future::Future;
+use std::hash::DefaultHasher;
 use std::time::Duration;
-#[cfg(feature = "experimental_async_runtime")]
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
 use std::time::SystemTime;
 
-#[cfg(feature = "experimental_async_runtime")]
-use crate::runtime::Runtime;
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+use opentelemetry_sdk::runtime::Runtime;
 
 /// Classification of errors for retry purposes.
 #[derive(Debug, Clone, PartialEq)]
@@ -46,11 +47,11 @@ pub struct RetryPolicy {
 
 /// A runtime stub for when experimental_async_runtime is not enabled.
 /// This allows retry policy to be configured but no actual retries occur.
-#[cfg(not(feature = "experimental_async_runtime"))]
+#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
 #[derive(Debug, Clone, Default)]
 pub struct NoOpRuntime;
 
-#[cfg(not(feature = "experimental_async_runtime"))]
+#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
 impl NoOpRuntime {
     /// Creates a new no-op runtime.
     pub fn new() -> Self {
@@ -59,13 +60,16 @@ impl NoOpRuntime {
 }
 
 // Generates a random jitter value up to max_jitter
-#[cfg(feature = "experimental_async_runtime")]
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
 fn generate_jitter(max_jitter: u64) -> u64 {
     let now = SystemTime::now();
     let nanos = now
         .duration_since(SystemTime::UNIX_EPOCH)
         .unwrap()
         .subsec_nanos();
+
+    let hasher = DefaultHasher::default();
+
     nanos as u64 % (max_jitter + 1)
 }
 
@@ -86,7 +90,7 @@ fn generate_jitter(max_jitter: u64) -> u64 {
 ///
 /// A `Result` containing the operation's result or an error if max retries are reached
 /// or a non-retryable error occurs.
-#[cfg(feature = "experimental_async_runtime")]
+#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
 pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     runtime: R,
     policy: RetryPolicy,
@@ -147,7 +151,7 @@ where
 
 /// No-op retry function for when experimental_async_runtime is not enabled.
 /// This function will execute the operation exactly once without any retries.
-#[cfg(not(feature = "experimental_async_runtime"))]
+#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
 pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     _runtime: R,
     _policy: RetryPolicy,
@@ -163,10 +167,10 @@ where
     operation().await
 }
 
-#[cfg(all(test, feature = "experimental_async_runtime", feature = "rt-tokio"))]
+#[cfg(all(test, feature = "experimental-grpc-retry"))]
 mod tests {
     use super::*;
-    use crate::runtime::Tokio;
+    use opentelemetry_sdk::runtime::Tokio;
     use std::sync::atomic::{AtomicUsize, Ordering};
     use std::time::Duration;
     use tokio::time::timeout;
diff --git a/opentelemetry-otlp/src/retry_classification.rs b/opentelemetry-otlp/src/retry_classification.rs
index 06a8d914ad..ca0f47100a 100644
--- a/opentelemetry-otlp/src/retry_classification.rs
+++ b/opentelemetry-otlp/src/retry_classification.rs
@@ -4,7 +4,7 @@
 //! supporting server-provided throttling hints like HTTP Retry-After headers and
 //! gRPC RetryInfo metadata.
 
-use opentelemetry_sdk::retry::RetryErrorType;
+use crate::retry::RetryErrorType;
 use std::time::Duration;
 
 #[cfg(feature = "grpc-tonic")]
@@ -222,7 +222,7 @@ mod tests {
     #[cfg(feature = "grpc-tonic")]
     mod grpc_tests {
         use crate::retry_classification::grpc::classify_tonic_status;
-        use opentelemetry_sdk::retry::RetryErrorType;
+        use crate::retry::RetryErrorType;
         use tonic_types::{ErrorDetails, StatusExt};
 
         #[test]
diff --git a/opentelemetry-sdk/src/lib.rs b/opentelemetry-sdk/src/lib.rs
index 1143e333f8..893b5ab97d 100644
--- a/opentelemetry-sdk/src/lib.rs
+++ b/opentelemetry-sdk/src/lib.rs
@@ -168,5 +168,3 @@ impl<T> From<std::sync::PoisonError<T>> for InMemoryExporterError {
     }
 }
 
-/// Retry logic for exporting telemetry data.
-pub mod retry;

From 661bbe3c406db532580f5300f3aa9b052fbbb00b Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Mon, 6 Oct 2025 09:42:32 +0200
Subject: [PATCH 24/27] chore: use DefaultHasher to spread out nano time jitter

---
 opentelemetry-otlp/src/exporter/http/mod.rs   |  6 +-
 opentelemetry-otlp/src/exporter/tonic/mod.rs  |  2 +-
 opentelemetry-otlp/src/retry.rs               | 60 ++++++++++++++-----
 .../src/retry_classification.rs               |  2 +-
 opentelemetry-sdk/src/lib.rs                  |  1 -
 5 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index 65e702c40f..b6336e37ae 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -22,10 +22,10 @@ use std::str::FromStr;
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
-#[cfg(feature = "experimental-http-retry")]
-use crate::retry_classification::http::classify_http_error;
 #[cfg(feature = "experimental-http-retry")]
 use crate::retry::{RetryErrorType, RetryPolicy};
+#[cfg(feature = "experimental-http-retry")]
+use crate::retry_classification::http::classify_http_error;
 
 // Shared HTTP retry functionality
 /// HTTP-specific error wrapper for retry classification
@@ -1492,8 +1492,8 @@ mod tests {
         #[test]
         fn test_with_retry_policy() {
             use super::super::HttpExporterBuilder;
-            use crate::WithHttpConfig;
             use crate::retry::RetryPolicy;
+            use crate::WithHttpConfig;
 
             let custom_policy = RetryPolicy {
                 max_retries: 5,
diff --git a/opentelemetry-otlp/src/exporter/tonic/mod.rs b/opentelemetry-otlp/src/exporter/tonic/mod.rs
index d99e298c06..15c098f60b 100644
--- a/opentelemetry-otlp/src/exporter/tonic/mod.rs
+++ b/opentelemetry-otlp/src/exporter/tonic/mod.rs
@@ -856,8 +856,8 @@ mod tests {
     #[cfg(feature = "experimental-grpc-retry")]
     #[test]
     fn test_with_retry_policy() {
-        use crate::WithTonicConfig;
         use crate::retry::RetryPolicy;
+        use crate::WithTonicConfig;
 
         let custom_policy = RetryPolicy {
             max_retries: 5,
diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs
index 23f8327c4d..939e44503b 100644
--- a/opentelemetry-otlp/src/retry.rs
+++ b/opentelemetry-otlp/src/retry.rs
@@ -8,16 +8,32 @@
 //! retries. The function uses error classification to determine retry behavior and can honor
 //! server-provided throttling hints.
 
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 use opentelemetry::otel_warn;
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 use std::future::Future;
-use std::hash::DefaultHasher;
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
+use std::hash::{DefaultHasher, Hasher};
 use std::time::Duration;
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 use std::time::SystemTime;
 
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 use opentelemetry_sdk::runtime::Runtime;
 
 /// Classification of errors for retry purposes.
@@ -47,11 +63,17 @@ pub struct RetryPolicy {
 
 /// A runtime stub for when experimental_async_runtime is not enabled.
 /// This allows retry policy to be configured but no actual retries occur.
-#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
+#[cfg(not(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+)))]
 #[derive(Debug, Clone, Default)]
 pub struct NoOpRuntime;
 
-#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
+#[cfg(not(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+)))]
 impl NoOpRuntime {
     /// Creates a new no-op runtime.
     pub fn new() -> Self {
@@ -60,17 +82,19 @@ impl NoOpRuntime {
 }
 
 // Generates a random jitter value up to max_jitter
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 fn generate_jitter(max_jitter: u64) -> u64 {
-    let now = SystemTime::now();
-    let nanos = now
+    let nanos = SystemTime::now()
         .duration_since(SystemTime::UNIX_EPOCH)
         .unwrap()
         .subsec_nanos();
 
-    let hasher = DefaultHasher::default();
-
-    nanos as u64 % (max_jitter + 1)
+    let mut hasher = DefaultHasher::default();
+    hasher.write_u32(nanos);
+    hasher.finish() % (max_jitter + 1)
 }
 
 /// Retries the given operation with exponential backoff, jitter, and error classification.
@@ -90,7 +114,10 @@ fn generate_jitter(max_jitter: u64) -> u64 {
 ///
 /// A `Result` containing the operation's result or an error if max retries are reached
 /// or a non-retryable error occurs.
-#[cfg(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry"))]
+#[cfg(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+))]
 pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     runtime: R,
     policy: RetryPolicy,
@@ -151,7 +178,10 @@ where
 
 /// No-op retry function for when experimental_async_runtime is not enabled.
 /// This function will execute the operation exactly once without any retries.
-#[cfg(not(any(feature = "experimental-grpc-retry", feature = "experimental-http-retry")))]
+#[cfg(not(any(
+    feature = "experimental-grpc-retry",
+    feature = "experimental-http-retry"
+)))]
 pub async fn retry_with_backoff<R, F, Fut, T, E, C>(
     _runtime: R,
     _policy: RetryPolicy,
diff --git a/opentelemetry-otlp/src/retry_classification.rs b/opentelemetry-otlp/src/retry_classification.rs
index ca0f47100a..a69757d5de 100644
--- a/opentelemetry-otlp/src/retry_classification.rs
+++ b/opentelemetry-otlp/src/retry_classification.rs
@@ -221,8 +221,8 @@ mod tests {
     // Tests for gRPC error classification using public interface
     #[cfg(feature = "grpc-tonic")]
     mod grpc_tests {
-        use crate::retry_classification::grpc::classify_tonic_status;
         use crate::retry::RetryErrorType;
+        use crate::retry_classification::grpc::classify_tonic_status;
         use tonic_types::{ErrorDetails, StatusExt};
 
         #[test]
diff --git a/opentelemetry-sdk/src/lib.rs b/opentelemetry-sdk/src/lib.rs
index 893b5ab97d..8f8e5a8653 100644
--- a/opentelemetry-sdk/src/lib.rs
+++ b/opentelemetry-sdk/src/lib.rs
@@ -167,4 +167,3 @@ impl<T> From<std::sync::PoisonError<T>> for InMemoryExporterError {
         InMemoryExporterError::InternalFailure(format!("Mutex poison error: {err}"))
     }
 }
-

From 4e29dfaf898bb49c19ee0a204bc35669312a1fb9 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Mon, 6 Oct 2025 09:49:41 +0200
Subject: [PATCH 25/27] chore: impl date parsing

---
 opentelemetry-otlp/Cargo.toml                 |  3 +-
 .../src/retry_classification.rs               | 57 ++++++++++++++++---
 2 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/opentelemetry-otlp/Cargo.toml b/opentelemetry-otlp/Cargo.toml
index 95b8c27166..5a030ecaff 100644
--- a/opentelemetry-otlp/Cargo.toml
+++ b/opentelemetry-otlp/Cargo.toml
@@ -40,6 +40,7 @@ tokio = { workspace = true, features = ["sync", "rt"], optional = true }
 
 reqwest = { workspace = true, optional = true }
 http = { workspace = true, optional = true }
+httpdate = { version = "1.0.3", optional = true }
 serde = { workspace = true, features = ["derive"], optional = true }
 thiserror = { workspace = true }
 serde_json = { workspace = true, optional = true }
@@ -88,7 +89,7 @@ tls-webpki-roots = ["tls", "tonic/tls-webpki-roots"]
 http-proto = ["prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "http", "trace", "metrics"]
 
 # http with retry support.
-experimental-http-retry = ["opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio", "tokio"]
+experimental-http-retry = ["opentelemetry_sdk/experimental_async_runtime", "opentelemetry_sdk/rt-tokio", "tokio", "httpdate"]
 
 http-json = ["serde_json", "prost", "opentelemetry-http", "opentelemetry-proto/gen-tonic-messages", "opentelemetry-proto/with-serde", "http", "trace", "metrics"]
 reqwest-blocking-client = ["reqwest/blocking", "opentelemetry-http/reqwest-blocking"]
diff --git a/opentelemetry-otlp/src/retry_classification.rs b/opentelemetry-otlp/src/retry_classification.rs
index a69757d5de..c65a7685d4 100644
--- a/opentelemetry-otlp/src/retry_classification.rs
+++ b/opentelemetry-otlp/src/retry_classification.rs
@@ -5,7 +5,6 @@
 //! gRPC RetryInfo metadata.
 
 use crate::retry::RetryErrorType;
-use std::time::Duration;
 
 #[cfg(feature = "grpc-tonic")]
 use tonic;
@@ -14,8 +13,10 @@ use tonic;
 use tonic_types::StatusExt;
 
 /// HTTP-specific error classification with Retry-After header support.
+#[cfg(feature = "experimental-http-retry")]
 pub mod http {
     use super::*;
+    use std::time::Duration;
 
     /// Classifies HTTP errors based on status code and headers.
     ///
@@ -76,14 +77,17 @@ pub mod http {
     }
 
     /// Parses HTTP date format and returns delay in seconds from now.
-    ///
-    /// This is a simplified parser for the most common HTTP date format.
-    /// TODO - should we use a library here?
     fn parse_http_date_to_delay(date_str: &str) -> Result<u64, ()> {
-        // For now, return error - would need proper HTTP date parsing
-        // This could be implemented with chrono or similar
-        let _ = date_str;
-        Err(())
+        use std::time::SystemTime;
+
+        // Try parse the date; if we fail, propagate an () error up to the caller.
+        let target_time = httpdate::parse_http_date(date_str).map_err(|_| ())?;
+
+        let now = SystemTime::now();
+        let delay = target_time
+            .duration_since(now)
+            .unwrap_or(std::time::Duration::ZERO);
+        Ok(delay.as_secs())
     }
 }
 
@@ -160,6 +164,7 @@ pub mod grpc {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::time::Duration;
 
     // Tests for HTTP error classification
     mod http_tests {
@@ -216,6 +221,42 @@ mod tests {
             assert_eq!(classify_http_error(200, None), RetryErrorType::Retryable);
             assert_eq!(classify_http_error(300, None), RetryErrorType::Retryable);
         }
+
+        #[test]
+        #[cfg(feature = "experimental-http-retry")]
+        fn test_http_429_with_retry_after_valid_date() {
+            use std::time::SystemTime;
+
+            // Create a time 30 seconds in the future
+            let future_time = SystemTime::now() + Duration::from_secs(30);
+            let date_str = httpdate::fmt_http_date(future_time);
+            let result = classify_http_error(429, Some(&date_str));
+            match result {
+                RetryErrorType::Throttled(duration) => {
+                    let secs = duration.as_secs();
+                    assert!(
+                        (29..=30).contains(&secs),
+                        "Expected ~30 seconds, got {}",
+                        secs
+                    );
+                }
+                _ => panic!("Expected Throttled, got {:?}", result),
+            }
+        }
+
+        #[test]
+        #[cfg(feature = "experimental-http-retry")]
+        fn test_http_429_with_retry_after_invalid_date() {
+            let result = classify_http_error(429, Some("Not a valid date"));
+            assert_eq!(result, RetryErrorType::Retryable); // Falls back to retryable
+        }
+
+        #[test]
+        #[cfg(feature = "experimental-http-retry")]
+        fn test_http_429_with_retry_after_malformed_date() {
+            let result = classify_http_error(429, Some("Sun, 99 Nov 9999 99:99:99 GMT"));
+            assert_eq!(result, RetryErrorType::Retryable); // Falls back to retryable
+        }
     }
 
     // Tests for gRPC error classification using public interface

From e49c4a70bcd98e0db7515f5077dfeda9640f53a7 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Mon, 6 Oct 2025 09:49:41 +0200
Subject: [PATCH 26/27] feat: add a 'no-async' runtime

---
 opentelemetry-sdk/src/runtime.rs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/opentelemetry-sdk/src/runtime.rs b/opentelemetry-sdk/src/runtime.rs
index f82ac5943a..8ae9c61df7 100644
--- a/opentelemetry-sdk/src/runtime.rs
+++ b/opentelemetry-sdk/src/runtime.rs
@@ -245,3 +245,31 @@ impl RuntimeChannel for TokioCurrentThread {
         )
     }
 }
+
+/// Runtime implementation for synchronous execution environments.
+///
+/// This runtime can be used when executing in a non-async environment.
+/// The runtime methods will perform their operations synchronously.
+#[cfg(feature = "experimental_async_runtime")]
+#[derive(Debug, Clone, Copy)]
+pub struct NoAsync;
+
+#[cfg(feature = "experimental_async_runtime")]
+impl Runtime for NoAsync {
+    fn spawn<F>(&self, future: F)
+    where
+        F: Future<Output = ()> + Send + 'static,
+    {
+        std::thread::spawn(move || {
+            futures_executor::block_on(future);
+        });
+    }
+
+    // Needed because async fn would borrow `self`, violating the `'static` requirement.
+    #[allow(clippy::manual_async_fn)]
+    fn delay(&self, duration: Duration) -> impl Future<Output = ()> + Send + 'static {
+        async move {
+            std::thread::sleep(duration);
+        }
+    }
+}

From 9ba3a06df952d4888009764bebe665281f9e45b7 Mon Sep 17 00:00:00 2001
From: Scott Gerring <scott@scottgerring.com>
Date: Wed, 8 Oct 2025 11:11:35 +0100
Subject: [PATCH 27/27] chore: select correct runtime

---
 opentelemetry-otlp/src/exporter/http/mod.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/opentelemetry-otlp/src/exporter/http/mod.rs b/opentelemetry-otlp/src/exporter/http/mod.rs
index b6336e37ae..d4e8938983 100644
--- a/opentelemetry-otlp/src/exporter/http/mod.rs
+++ b/opentelemetry-otlp/src/exporter/http/mod.rs
@@ -388,7 +388,6 @@ impl OtlpHttpClient {
         #[cfg(feature = "experimental-http-retry")]
         {
             use crate::retry::retry_with_backoff;
-            use opentelemetry_sdk::runtime::Tokio;
 
             // Build request body once before retry loop
             let (body, content_type, content_encoding) = build_body_fn(self, data)
@@ -400,8 +399,17 @@ impl OtlpHttpClient {
                 endpoint: self.collector_endpoint.to_string(),
             });
 
+            // Select runtime based on HTTP client feature - if we're using
+            // one without Tokio, we don't need or want the Tokio async blocking
+            // behaviour.
+            #[cfg(feature = "reqwest-blocking-client")]
+            let runtime = opentelemetry_sdk::runtime::NoAsync;
+
+            #[cfg(not(feature = "reqwest-blocking-client"))]
+            let runtime = opentelemetry_sdk::runtime::Tokio;
+
             retry_with_backoff(
-                Tokio,
+                runtime,
                 self.retry_policy.clone(),
                 classify_http_export_error,
                 operation_name,