dapr
diff --git a/‎README.md‎
Lines changed: 54 additions & 2 deletions b/‎README.md‎
Lines changed: 54 additions & 2 deletions
diff --git a/‎dev-requirements.txt‎
Lines changed: 0 additions & 1 deletion b/‎dev-requirements.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎durabletask/client.py‎
Lines changed: 72 additions & 4 deletions b/‎durabletask/client.py‎
Lines changed: 72 additions & 4 deletions
diff --git a/‎durabletask/internal/shared.py‎
Lines changed: 1 addition & 1 deletion b/‎durabletask/internal/shared.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎durabletask/task.py‎
Lines changed: 37 additions & 1 deletion b/‎durabletask/task.py‎
Lines changed: 37 additions & 1 deletion
@@ -126,10 +126,62 @@ Orchestrations can be continued as new using the `continue_as_new` API. This API
 
 Orchestrations can be suspended using the `suspend_orchestration` client API and will remain suspended until resumed using the `resume_orchestration` client API. A suspended orchestration will stop processing new events, but will continue to buffer any that happen to arrive until resumed, ensuring that no data is lost. An orchestration can also be terminated using the `terminate_orchestration` client API. Terminated orchestrations will stop processing new events and will discard any buffered events.
 
-### Retry policies (TODO)
+### Retry policies
 
 Orchestrations can specify retry policies for activities and sub-orchestrations. These policies control how many times and how frequently an activity or sub-orchestration will be retried in the event of a transient error.
 
+#### Creating a retry policy
+
+```python
+from datetime import timedelta
+from durabletask import task
+
+retry_policy = task.RetryPolicy(
+    first_retry_interval=timedelta(seconds=1),     # Initial delay before first retry
+    max_number_of_attempts=5,                      # Maximum total attempts (includes first attempt)
+    backoff_coefficient=2.0,                       # Exponential backoff multiplier (must be >= 1)
+    max_retry_interval=timedelta(seconds=30),      # Cap on retry delay
+    retry_timeout=timedelta(minutes=5),            # Total time limit for all retries (optional)
+)
+```
+
+**Notes:**
+- `max_number_of_attempts` **includes the initial attempt**. For example, `max_number_of_attempts=5` means 1 initial attempt + up to 4 retries.
+- `retry_timeout` is optional. If omitted or set to `None`, retries continue until `max_number_of_attempts` is reached.
+- `backoff_coefficient` controls exponential backoff: delay = `first_retry_interval * (backoff_coefficient ^ retry_number)`, capped by `max_retry_interval`.
+- `non_retryable_error_types` (optional) can specify additional exception types to treat as non-retryable (e.g., `[ValueError, TypeError]`). `NonRetryableError` is always non-retryable regardless of this setting.
+
+#### Using retry policies
+
+Apply retry policies to activities or sub-orchestrations:
+
+```python
+def my_orchestrator(ctx: task.OrchestrationContext, input):
+    # Retry an activity
+    result = yield ctx.call_activity(my_activity, input=data, retry_policy=retry_policy)
+    
+    # Retry a sub-orchestration
+    result = yield ctx.call_sub_orchestrator(child_orchestrator, input=data, retry_policy=retry_policy)
+```
+
+#### Non-retryable errors
+
+For errors that should not be retried (e.g., validation failures, permanent errors), raise a `NonRetryableError`:
+
+```python
+from durabletask.task import NonRetryableError
+
+def my_activity(ctx: task.ActivityContext, input):
+    if input is None:
+        # This error will bypass retry logic and fail immediately
+        raise NonRetryableError("Input cannot be None")
+    
+    # Transient errors (network, timeouts, etc.) will be retried
+    return call_external_service(input)
+```
+
+Even with a retry policy configured, `NonRetryableError` will fail immediately without retrying.
+
 ## Getting Started
 
 ### Prerequisites
@@ -194,7 +246,7 @@ Certain aspects like multi-app activities require the full dapr runtime to be ru
 ```shell
 dapr init || true
 
-dapr run --app-id test-app --dapr-grpc-port  4001 --components-path ./examples/components/
+dapr run --app-id test-app --dapr-grpc-port  4001 --resources-path ./examples/components/
 ```
 
 To run the E2E tests on a specific python version (eg: 3.11), run the following command from the project root:
 
@@ -1 +0,0 @@
-grpcio-tools==1.62.3 # 1.62.X is the latest version before protobuf 1.26.X is used which has breaking changes for Python # supports protobuf 6.x and aligns with generated code
@@ -127,9 +127,28 @@ def __init__(
             interceptors=interceptors,
             options=channel_options,
         )
+        self._channel = channel
         self._stub = stubs.TaskHubSidecarServiceStub(channel)
         self._logger = shared.get_logger("client", log_handler, log_formatter)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        try:
+            self.close()
+        finally:
+            return False
+
+    def close(self) -> None:
+        """Close the underlying gRPC channel."""
+        try:
+            # grpc.Channel.close() is idempotent
+            self._channel.close()
+        except Exception:
+            # Best-effort cleanup
+            pass
+
     def schedule_new_orchestration(
         self,
         orchestrator: Union[task.Orchestrator[TInput, TOutput], str],
@@ -188,10 +207,59 @@ def wait_for_orchestration_completion(
     ) -> Optional[OrchestrationState]:
         req = pb.GetInstanceRequest(instanceId=instance_id, getInputsAndOutputs=fetch_payloads)
         try:
-            grpc_timeout = None if timeout == 0 else timeout
-            self._logger.info(
-                f"Waiting {'indefinitely' if timeout == 0 else f'up to {timeout}s'} for instance '{instance_id}' to complete."
-            )
+            # gRPC timeout mapping (pytest unit tests may pass None explicitly)
+            grpc_timeout = None if (timeout is None or timeout == 0) else timeout
+
+            # If timeout is None or 0, skip pre-checks/polling and call server-side wait directly
+            if timeout is None or timeout == 0:
+                self._logger.info(
+                    f"Waiting {'indefinitely' if not timeout else f'up to {timeout}s'} for instance '{instance_id}' to complete."
+                )
+                res: pb.GetInstanceResponse = self._stub.WaitForInstanceCompletion(
+                    req, timeout=grpc_timeout
+                )
+                state = new_orchestration_state(req.instanceId, res)
+                return state
+
+            # For positive timeout, best-effort pre-check and short polling to avoid long server waits
+            try:
+                # First check if the orchestration is already completed
+                current_state = self.get_orchestration_state(
+                    instance_id, fetch_payloads=fetch_payloads
+                )
+                if current_state and current_state.runtime_status in [
+                    OrchestrationStatus.COMPLETED,
+                    OrchestrationStatus.FAILED,
+                    OrchestrationStatus.TERMINATED,
+                ]:
+                    return current_state
+
+                # Poll for completion with exponential backoff to handle eventual consistency
+                import time
+
+                poll_timeout = min(timeout, 10)
+                poll_start = time.time()
+                poll_interval = 0.1
+
+                while time.time() - poll_start < poll_timeout:
+                    current_state = self.get_orchestration_state(
+                        instance_id, fetch_payloads=fetch_payloads
+                    )
+
+                    if current_state and current_state.runtime_status in [
+                        OrchestrationStatus.COMPLETED,
+                        OrchestrationStatus.FAILED,
+                        OrchestrationStatus.TERMINATED,
+                    ]:
+                        return current_state
+
+                    time.sleep(poll_interval)
+                    poll_interval = min(poll_interval * 1.5, 1.0)  # Exponential backoff, max 1s
+            except Exception:
+                # Ignore pre-check/poll issues (e.g., mocked stubs in unit tests) and fall back
+                pass
+
+            self._logger.info(f"Waiting up to {timeout}s for instance '{instance_id}' to complete.")
             res: pb.GetInstanceResponse = self._stub.WaitForInstanceCompletion(
                 req, timeout=grpc_timeout
             )
 
@@ -102,7 +102,7 @@ def get_logger(
     # Add a default log handler if none is provided
     if log_handler is None:
         log_handler = logging.StreamHandler()
-        log_handler.setLevel(logging.INFO)
+        log_handler.setLevel(logging.DEBUG)
     logger.handlers.append(log_handler)
 
     # Set a default log formatter to our handler if none is provided
 
@@ -233,6 +233,16 @@ class OrchestrationStateError(Exception):
     pass
 
 
+class NonRetryableError(Exception):
+    """Exception indicating the operation should not be retried.
+
+    If an activity or sub-orchestration raises this exception, retry logic will be
+    bypassed and the failure will be returned immediately to the orchestrator.
+    """
+
+    pass
+
+
 class Task(ABC, Generic[T]):
     """Abstract base class for asynchronous tasks in a durable orchestration."""
 
@@ -397,7 +407,7 @@ def compute_next_delay(self) -> Optional[timedelta]:
                 next_delay_f = min(
                     next_delay_f, self._retry_policy.max_retry_interval.total_seconds()
                 )
-                return timedelta(seconds=next_delay_f)
+            return timedelta(seconds=next_delay_f)
 
         return None
 
@@ -490,6 +500,7 @@ def __init__(
         backoff_coefficient: Optional[float] = 1.0,
         max_retry_interval: Optional[timedelta] = None,
         retry_timeout: Optional[timedelta] = None,
+        non_retryable_error_types: Optional[list[Union[str, type]]] = None,
     ):
         """Creates a new RetryPolicy instance.
 
@@ -505,6 +516,11 @@ def __init__(
             The maximum retry interval to use for any retry attempt.
         retry_timeout : Optional[timedelta]
             The maximum amount of time to spend retrying the operation.
+        non_retryable_error_types : Optional[list[Union[str, type]]]
+            A list of exception type names or classes that should not be retried.
+            If a failure's error type matches any of these, the task fails immediately.
+            The built-in NonRetryableError is always treated as non-retryable regardless
+            of this setting.
         """
         # validate inputs
         if first_retry_interval < timedelta(seconds=0):
@@ -523,6 +539,17 @@ def __init__(
         self._backoff_coefficient = backoff_coefficient
         self._max_retry_interval = max_retry_interval
         self._retry_timeout = retry_timeout
+        # Normalize non-retryable error type names to a set of strings
+        names: Optional[set[str]] = None
+        if non_retryable_error_types:
+            names = set()
+            for t in non_retryable_error_types:
+                if isinstance(t, str):
+                    if t:
+                        names.add(t)
+                elif isinstance(t, type):
+                    names.add(t.__name__)
+        self._non_retryable_error_types = names
 
     @property
     def first_retry_interval(self) -> timedelta:
@@ -549,6 +576,15 @@ def retry_timeout(self) -> Optional[timedelta]:
         """The maximum amount of time to spend retrying the operation."""
         return self._retry_timeout
 
+    @property
+    def non_retryable_error_types(self) -> Optional[set[str]]:
+        """Set of error type names that should not be retried.
+
+        Comparison is performed against the errorType string provided by the
+        backend (typically the exception class name).
+        """
+        return self._non_retryable_error_types
+
 
 def get_name(fn: Callable) -> str:
     """Returns the name of the provided function"""
Original file line number	Diff line number	Diff line change
`@@ -1 +0,0 @@`
`1`		`-grpcio-tools==1.62.3 # 1.62.X is the latest version before protobuf 1.26.X is used which has breaking changes for Python # supports protobuf 6.x and aligns with generated code`