diff --git a/.github/workflows/publish-state-mangaer.yml b/.github/workflows/publish-state-mangaer.yml
index 883fd9b8..b3926336 100644
--- a/.github/workflows/publish-state-mangaer.yml
+++ b/.github/workflows/publish-state-mangaer.yml
@@ -111,30 +111,3 @@ jobs:
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-
-  deploy-to-k8s:
-    needs: publish-image
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Deploy to K8s
-        run: |
-          echo "${{ secrets.KUBE_CONFIG }}" | base64 -d > kubeconfig.yaml
-          export KUBECONFIG=$PWD/kubeconfig.yaml
-          kubectl get nodes
-
-          echo "selected image: ${{ fromJson(needs.publish-image.outputs.json).tags[1] }}"
-
-          kubectl set image deployment/exosphere-state-manager exosphere-state-manager=${{fromJson(needs.publish-image.outputs.json).tags[1]}}
-
-          kubectl rollout status deployment/exosphere-state-manager
-
-          status=$(kubectl rollout status deployment/exosphere-state-manager)
-
-          echo "$status"
-
-          if [[ "$status" != *"successfully rolled out"* ]]; then
-            kubectl rollout undo deployment/exosphere-state-manager
-            echo "❌ Deployment failed. Rolled back." >&2
-            exit 1
-          fi
\ No newline at end of file
diff --git a/docs/docs/exosphere/create-graph.md b/docs/docs/exosphere/create-graph.md
index 11d20395..178debad 100644
--- a/docs/docs/exosphere/create-graph.md
+++ b/docs/docs/exosphere/create-graph.md
@@ -51,7 +51,13 @@ One can define a graph on Exosphere through a simple json config, which specifie
       },
       "next_nodes": []
     }
-  ]
+  ],
+  "retry_policy": {
+    "max_retries": 3,
+    "strategy": "EXPONENTIAL",
+    "backoff_factor": 2000,
+    "exponent": 2
+  }
 }
 ```
 
@@ -126,6 +132,24 @@ Use the `${{ ... }}` syntax to map outputs from previous nodes:
 - **`${{ node_identifier.outputs.field_name }}`**: Maps output from a specific node
 - **`initial`**: Static value provided when the graph is triggered
 - **Direct values**: String values. In v1, numbers/booleans must be string-encoded (e.g., "42", "true").
+
+### Retry Policy
+
+Graphs can include a retry policy to handle transient failures automatically. The retry policy is configured at the graph level and applies to all nodes within the graph.
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 3,
+    "strategy": "EXPONENTIAL",
+    "backoff_factor": 2000, // milliseconds
+    "exponent": 2
+  }
+}
+```
+
+For detailed information about retry policies, including all available strategies and configuration options, see the [Retry Policy](retry-policy.md) documentation.
+
 ## Creating Graph Templates
 
 The recommended way to create graph templates is using the Exosphere Python SDK, which provides a clean interface to the State Manager API.
@@ -156,7 +180,13 @@ async def create_graph_template():
         result = await state_manager.upsert_graph(
             graph_name="my-workflow",
             graph_nodes=graph_nodes,
-            secrets=secrets
+            secrets=secrets,
+            retry_policy={
+                "max_retries": 3,
+                "strategy": "EXPONENTIAL",
+                "backoff_factor": 2000,
+                "exponent": 2
+            }
         )
         print("Graph template created successfully!")
         print(f"Validation status: {result['validation_status']}")
@@ -268,7 +298,13 @@ The state manager validates your graph template:
             result = await state_manager.upsert_graph(
                 graph_name="my-workflow",
                 graph_nodes=updated_nodes,
-                secrets=updated_secrets
+                secrets=updated_secrets,
+                retry_policy={
+                    "max_retries": 3,
+                    "strategy": "EXPONENTIAL",
+                    "backoff_factor": 2000,
+                    "exponent": 2
+                }
             )
             print("Graph template updated successfully!")
             print(f"Validation status: {result['validation_status']}")
diff --git a/docs/docs/exosphere/retry-policy.md b/docs/docs/exosphere/retry-policy.md
new file mode 100644
index 00000000..639306dc
--- /dev/null
+++ b/docs/docs/exosphere/retry-policy.md
@@ -0,0 +1,397 @@
+# Retry Policy
+
+!!! beta "Beta Feature"
+    Retry Policy is currently available in beta. The API and functionality may change in future releases.
+
+The Retry Policy feature in Exosphere provides sophisticated retry mechanisms for handling transient failures in your workflow nodes. When a node execution fails, the retry policy automatically determines when and how to retry the execution based on configurable strategies.
+
+## Overview
+
+Retry policies are configured at the graph level and apply to all nodes within that graph. When a node fails with an error, the state manager automatically creates a retry state with a calculated delay before the next execution attempt.
+
+## Configuration
+
+Retry policies are defined in your graph template configuration:
+
+```json
+{
+  "secrets": {
+    "api_key": "your-api-key"
+  },
+  "nodes": [
+    {
+      "node_name": "MyNode",
+      "namespace": "MyProject",
+      "identifier": "my_node",
+      "inputs": {
+        "data": "initial"
+      },
+      "next_nodes": []
+    }
+  ],
+  "retry_policy": {
+    "max_retries": 3,
+    "strategy": "EXPONENTIAL",
+    "backoff_factor": 2000,
+    "exponent": 2,
+    "max_delay": 3600000
+  }
+}
+```
+
+## Parameters
+
+### max_retries
+
+- **Type**: `int`
+- **Default**: `3`
+- **Description**: The maximum number of retry attempts before giving up
+- **Constraints**: Must be >= 0
+
+### strategy
+
+- **Type**: `string`
+- **Default**: `"EXPONENTIAL"`
+- **Description**: The retry strategy to use for calculating delays
+- **Options**: See [Retry Strategies](#retry-strategies) below
+
+### backoff_factor
+
+- **Type**: `int` (milliseconds)
+- **Default**: `2000` (2 seconds)
+- **Description**: The base delay factor in milliseconds
+- **Constraints**: Must be > 0
+
+### exponent
+
+- **Type**: `int`
+- **Default**: `2`
+- **Description**: The exponent used for exponential strategies
+- **Constraints**: Must be > 0
+
+### max_delay
+
+- **Type**: `int | null` (milliseconds)
+- **Default**: `null` (no maximum delay)
+- **Description**: The maximum delay in milliseconds that any retry attempt can have. When set, all calculated delays are capped at this value using the `_cap` function
+- **Constraints**: Must be > 0 when not null
+- **Example**: `3600000` (1 hour) would cap all delays to a maximum of 1 hour
+
+## Retry Strategies
+
+Exosphere supports three main categories of retry strategies, each with jitter variants to prevent thundering herd problems.
+
+### Exponential Strategies
+
+Exponential strategies increase the delay exponentially with each retry attempt.
+
+#### EXPONENTIAL
+
+Standard exponential backoff without jitter.
+
+**Formula**: `backoff_factor * (exponent ^ (retry_count - 1))`
+
+**Example**:
+
+- Retry 1: 2000ms (2 seconds)
+- Retry 2: 4000ms (4 seconds)
+- Retry 3: 8000ms (8 seconds)
+
+#### EXPONENTIAL_FULL_JITTER
+
+Exponential backoff with full jitter (random delay between 0 and calculated delay).
+
+**Formula**: `random(0, backoff_factor * (exponent ^ (retry_count - 1)))`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 0-2000ms (random)
+- Retry 2: 0-4000ms (random)
+- Retry 3: 0-8000ms (random)
+
+#### EXPONENTIAL_EQUAL_JITTER
+
+Exponential backoff with equal jitter (random delay around half the calculated delay).
+
+**Formula**: `(backoff_factor * (exponent ^ (retry_count - 1))) / 2 + random(0, (backoff_factor * (exponent ^ (retry_count - 1))) / 2)`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 1000-2000ms (random)
+- Retry 2: 2000-4000ms (random)
+- Retry 3: 4000-8000ms (random)
+
+### Linear Strategies
+
+Linear strategies increase the delay linearly with each retry attempt.
+
+#### LINEAR
+
+Standard linear backoff without jitter.
+
+**Formula**: `backoff_factor * retry_count`
+
+**Example**:
+
+- Retry 1: 2000ms (2 seconds)
+- Retry 2: 4000ms (4 seconds)
+- Retry 3: 6000ms (6 seconds)
+
+#### LINEAR_FULL_JITTER
+
+Linear backoff with full jitter.
+
+**Formula**: `random(0, backoff_factor * retry_count)`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 0-2000ms (random)
+- Retry 2: 0-4000ms (random)
+- Retry 3: 0-6000ms (random)
+
+#### LINEAR_EQUAL_JITTER
+
+Linear backoff with equal jitter.
+
+**Formula**: `(backoff_factor * retry_count) / 2 + random(0, (backoff_factor * retry_count) / 2)`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 1000-2000ms (random)
+- Retry 2: 2000-4000ms (random)
+- Retry 3: 3000-6000ms (random)
+
+### Fixed Strategies
+
+Fixed strategies use a constant delay for all retry attempts.
+
+#### FIXED
+
+Standard fixed delay without jitter.
+
+**Formula**: `backoff_factor`
+
+**Example**:
+
+- Retry 1: 2000ms (2 seconds)
+- Retry 2: 2000ms (2 seconds)
+- Retry 3: 2000ms (2 seconds)
+
+#### FIXED_FULL_JITTER
+
+Fixed delay with full jitter.
+
+**Formula**: `random(0, backoff_factor)`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 0-2000ms (random)
+- Retry 2: 0-2000ms (random)
+- Retry 3: 0-2000ms (random)
+
+#### FIXED_EQUAL_JITTER
+
+Fixed delay with equal jitter.
+
+**Formula**: `backoff_factor / 2 + random(0, backoff_factor / 2)`
+
+*Note: `random(a, b)` denotes a uniform random draw over the inclusive range [a, b].*
+
+**Example**:
+
+- Retry 1: 1000-2000ms (random)
+- Retry 2: 1000-2000ms (random)
+- Retry 3: 1000-2000ms (random)
+
+## Delay Capping
+
+The retry policy includes a built-in delay capping mechanism through the `_cap` function and `max_delay` parameter. This ensures that retry delays never exceed a specified maximum value, even with aggressive exponential backoff strategies.
+
+### How Delay Capping Works
+
+The `_cap` function is applied to all calculated delays:
+
+```python
+def _cap(value: int) -> int:
+    if self.max_delay is not None:
+        return min(value, self.max_delay)
+    return value
+```
+
+**Behavior:**
+
+- If `max_delay` is set, all calculated delays are capped at this value
+- If `max_delay` is `null` (default), no capping is applied
+- The capping is applied after all strategy calculations.
+
+### Example with Delay Capping
+
+Consider an exponential strategy with `backoff_factor: 2000`, `exponent: 2`, and `max_delay: 10000`:
+
+**With capping:**
+
+- Retry 1: 2000ms
+- Retry 2: 4000ms
+- Retry 3: 8000ms
+- Retry 4: 10000ms (capped at max_delay)
+
+### When to Use Delay Capping
+
+- **Long-running workflows**: Prevent excessive delays that could impact overall workflow completion time
+- **User-facing applications**: Ensure retries don't create unacceptable wait times
+- **Resource management**: Control resource consumption by limiting retry delays
+- **Predictable behavior**: Create more predictable retry patterns for monitoring and alerting
+
+## Usage Examples
+
+### Basic Exponential Retry
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 3,
+    "strategy": "EXPONENTIAL",
+    "backoff_factor": 1000,
+    "exponent": 2
+  }
+}
+```
+
+### Aggressive Retry with Jitter
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 5,
+    "strategy": "EXPONENTIAL_FULL_JITTER",
+    "backoff_factor": 500,
+    "exponent": 3
+  }
+}
+```
+
+### Conservative Linear Retry
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 2,
+    "strategy": "LINEAR",
+    "backoff_factor": 5000
+  }
+}
+```
+
+### Fixed Retry for Rate Limiting
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 10,
+    "strategy": "FIXED_EQUAL_JITTER",
+    "backoff_factor": 1000
+  }
+}
+```
+
+### Exponential Retry with Delay Capping
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 5,
+    "strategy": "EXPONENTIAL",
+    "backoff_factor": 2000,
+    "exponent": 2,
+    "max_delay": 30000
+  }
+}
+```
+
+### Conservative Retry with Maximum Delay
+
+```json
+{
+  "retry_policy": {
+    "max_retries": 3,
+    "strategy": "EXPONENTIAL_FULL_JITTER",
+    "backoff_factor": 1000,
+    "exponent": 3,
+    "max_delay": 60000
+  }
+}
+```
+
+## When Retries Are Triggered
+
+Retries are automatically triggered when:
+
+1. A node execution fails with an error
+2. The current retry count is less than `max_retries`
+3. The state status is `QUEUED`
+
+The retry mechanism:
+
+- Creates a new state with `retry_count` incremented by 1
+- Sets `enqueue_after` to the current time plus the calculated delay
+- Sets the original state status to `ERRORED` with the error message
+
+## Best Practices
+
+### Choose the Right Strategy
+
+- **EXPONENTIAL**: Best for most transient failures (network issues, temporary service unavailability)
+- **LINEAR**: Good for predictable, consistent delays
+- **FIXED**: Useful for rate limiting scenarios
+
+### Use Jitter for High Concurrency
+
+- **FULL_JITTER**: Best for high concurrency to prevent thundering herd
+- **EQUAL_JITTER**: Good balance between predictability and randomization
+- **No Jitter**: Use only when you need deterministic behavior
+
+### Set Appropriate Limits
+
+- **max_retries**: Consider the nature of your failures and downstream dependencies
+- **backoff_factor**: Balance between responsiveness and resource usage
+- **exponent**: Higher values create more aggressive backoff
+- **max_delay**: Set a reasonable maximum delay to prevent excessive wait times, especially for exponential strategies
+
+### Monitor Retry Patterns
+
+- Track retry counts in your monitoring system
+- Set up alerts for graphs with high retry rates
+- Analyze retry patterns to identify systemic issues
+
+## Limitations
+
+- Retry policies apply to all nodes in a graph uniformly
+- Individual node-level retry policies are not supported
+- Retry delays are calculated in milliseconds
+- Maximum delay can be capped using the `max_delay` parameter (recommended for long-running workflows)
+
+## Error Handling
+
+If a retry policy configuration is invalid:
+
+- The graph template validation will fail
+- An error will be returned during graph creation
+- The graph will not be saved until the configuration is corrected
+
+## Integration with Signals
+
+Retry policies work alongside Exosphere's signaling system:
+
+- Nodes can still raise `PruneSignal` to stop retries immediately
+- Nodes can raise `ReQueueAfterSignal` to re-queue after some time. This will not mark nodes as failures.
+- When a node is re-queued using `ReQueueAfterSignal`, the `retry_count` is not incremented. The existing count is carried over to the new state.
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 51df18d8..5ebcf23f 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -101,6 +101,7 @@ plugins:
           - exosphere/register-node.md
           - exosphere/create-runtime.md
           - exosphere/create-graph.md
+          - exosphere/retry-policy.md
           - exosphere/trigger-graph.md
           - exosphere/dashboard.md
           - exosphere/signals.md
@@ -129,6 +130,7 @@ nav:
   - Register Node: exosphere/register-node.md 
   - Create Runtime: exosphere/create-runtime.md
   - Create Graph: exosphere/create-graph.md
+  - Retry Policy: exosphere/retry-policy.md
   - Trigger Graph: exosphere/trigger-graph.md  
   - Dashboard: exosphere/dashboard.md
   - Signals: exosphere/signals.md
diff --git a/state-manager/app/controller/errored_state.py b/state-manager/app/controller/errored_state.py
index b59e2573..f798cec8 100644
--- a/state-manager/app/controller/errored_state.py
+++ b/state-manager/app/controller/errored_state.py
@@ -1,10 +1,14 @@
+import time
+
 from app.models.errored_models import ErroredRequestModel, ErroredResponseModel
 from fastapi import HTTPException, status
 from beanie import PydanticObjectId
+from pymongo.errors import DuplicateKeyError
 
 from app.models.db.state import State
 from app.models.state_status_enum import StateStatusEnum
 from app.singletons.logs_manager import LogsManager
+from app.models.db.graph_template_model import GraphTemplate
 
 logger = LogsManager().get_logger()
 
@@ -23,11 +27,45 @@ async def errored_state(namespace_name: str, state_id: PydanticObjectId, body: E
         if state.status == StateStatusEnum.EXECUTED:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="State is already executed")
         
+        try:
+            graph_template = await GraphTemplate.get(namespace_name, state.graph_name)
+        except Exception as e:
+            logger.error(f"Error getting graph template {state.graph_name} for namespace {namespace_name}", x_exosphere_request_id=x_exosphere_request_id, error=e)
+            if isinstance(e, ValueError) and "Graph template not found" in str(e):
+                raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Graph template not found")
+            raise e
+
+        retry_created = False
+
+        if state.retry_count < graph_template.retry_policy.max_retries:
+            try:
+                retry_state = State(
+                    node_name=state.node_name,
+                    namespace_name=state.namespace_name,
+                    identifier=state.identifier,
+                    graph_name=state.graph_name,
+                    run_id=state.run_id,
+                    status=StateStatusEnum.CREATED,
+                    inputs=state.inputs,
+                    outputs={},
+                    error=None,
+                    parents=state.parents,
+                    does_unites=state.does_unites,
+                    enqueue_after= int(time.time() * 1000) + graph_template.retry_policy.compute_delay(state.retry_count + 1),
+                    retry_count=state.retry_count + 1,
+                    fanout_id=state.fanout_id
+                )
+                retry_state = await retry_state.insert()
+                logger.info(f"Retry state {retry_state.id} created for state {state_id}", x_exosphere_request_id=x_exosphere_request_id)
+                retry_created = True
+            except DuplicateKeyError:
+                logger.info(f"Duplicate retry state detected for state {state_id}. A retry state with the same unique key already exists.", x_exosphere_request_id=x_exosphere_request_id)
+
         state.status = StateStatusEnum.ERRORED
         state.error = body.error
         await state.save()
 
-        return ErroredResponseModel(status=StateStatusEnum.ERRORED)
+        return ErroredResponseModel(status=StateStatusEnum.ERRORED, retry_created=retry_created)
 
     except Exception as e:
         logger.error(f"Error errored state {state_id} for namespace {namespace_name}", x_exosphere_request_id=x_exosphere_request_id, error=e)
diff --git a/state-manager/app/controller/upsert_graph_template.py b/state-manager/app/controller/upsert_graph_template.py
index 99a178ae..16882018 100644
--- a/state-manager/app/controller/upsert_graph_template.py
+++ b/state-manager/app/controller/upsert_graph_template.py
@@ -27,7 +27,8 @@ async def upsert_graph_template(namespace_name: str, graph_name: str, body: Upse
                     Set({
                         GraphTemplate.nodes: body.nodes, # type: ignore
                         GraphTemplate.validation_status: GraphTemplateValidationStatus.PENDING, # type: ignore
-                        GraphTemplate.validation_errors: [] # type: ignore
+                        GraphTemplate.validation_errors: [], # type: ignore
+                        GraphTemplate.retry_policy: body.retry_policy # type: ignore
                     })
                 )
                 
@@ -44,7 +45,8 @@ async def upsert_graph_template(namespace_name: str, graph_name: str, body: Upse
                         namespace=namespace_name,
                         nodes=body.nodes,
                         validation_status=GraphTemplateValidationStatus.PENDING,
-                        validation_errors=[]
+                        validation_errors=[],
+                        retry_policy=body.retry_policy
                     ).set_secrets(body.secrets)
                 )
         except ValueError as e:
@@ -58,6 +60,7 @@ async def upsert_graph_template(namespace_name: str, graph_name: str, body: Upse
             validation_status=graph_template.validation_status,
             validation_errors=graph_template.validation_errors,
             secrets={secret_name: True for secret_name in graph_template.get_secrets().keys()},
+            retry_policy=graph_template.retry_policy,
             created_at=graph_template.created_at,
             updated_at=graph_template.updated_at
         )
diff --git a/state-manager/app/models/db/graph_template_model.py b/state-manager/app/models/db/graph_template_model.py
index 999d5389..4b3a4731 100644
--- a/state-manager/app/models/db/graph_template_model.py
+++ b/state-manager/app/models/db/graph_template_model.py
@@ -11,7 +11,7 @@
 from ..node_template_model import NodeTemplate
 from app.utils.encrypter import get_encrypter
 from app.models.dependent_string import DependentString
-
+from app.models.retry_policy_model import RetryPolicyModel
 
 class GraphTemplate(BaseDatabaseModel):
     name: str = Field(..., description="Name of the graph")
@@ -20,6 +20,7 @@ class GraphTemplate(BaseDatabaseModel):
     validation_status: GraphTemplateValidationStatus = Field(..., description="Validation status of the graph")
     validation_errors: List[str] = Field(default_factory=list, description="Validation errors of the graph")
     secrets: Dict[str, str] = Field(default_factory=dict, description="Secrets of the graph")
+    retry_policy: RetryPolicyModel = Field(default_factory=RetryPolicyModel, description="Retry policy of the graph")
 
     _node_by_identifier: Dict[str, NodeTemplate] | None = PrivateAttr(default=None)
     _parents_by_identifier: Dict[str, set[str]] | None = PrivateAttr(default=None) # type: ignore
diff --git a/state-manager/app/models/db/state.py b/state-manager/app/models/db/state.py
index 6ed350d2..05441ec3 100644
--- a/state-manager/app/models/db/state.py
+++ b/state-manager/app/models/db/state.py
@@ -8,6 +8,7 @@
 import hashlib
 import json
 import time
+import uuid
 
 class State(BaseDatabaseModel):
     node_name: str = Field(..., description="Name of the node of the state")
@@ -24,7 +25,9 @@ class State(BaseDatabaseModel):
     does_unites: bool = Field(default=False, description="Whether this state unites other states")
     state_fingerprint: str = Field(default="", description="Fingerprint of the state")
     enqueue_after: int = Field(default_factory=lambda: int(time.time() * 1000), gt=0, description="Unix time in milliseconds after which the state should be enqueued")
-    
+    retry_count: int = Field(default=0, description="Number of times the state has been retried")
+    fanout_id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Fanout ID of the state")
+
     @before_event([Insert, Replace, Save])
     def _generate_fingerprint(self):
         if not self.does_unites:
@@ -37,6 +40,7 @@ def _generate_fingerprint(self):
             "identifier": self.identifier,
             "graph_name": self.graph_name,
             "run_id": self.run_id,
+            "retry_count": self.retry_count,
             "parents": {k: str(v) for k, v in self.parents.items()},
         }
         payload = json.dumps(
@@ -76,5 +80,18 @@ class Settings:
                     ("node_name", 1),
                 ],
                 name="enqueue_query"
+            ),
+            IndexModel(
+                [
+                    ("node_name", 1),
+                    ("namespace_name", 1),
+                    ("graph_name", 1),
+                    ("identifier", 1),
+                    ("run_id", 1),
+                    ("retry_count", 1),
+                    ("fanout_id", 1),
+                ],
+                unique=True,
+                name="uniq_fanout_retry"
             )
         ]
\ No newline at end of file
diff --git a/state-manager/app/models/errored_models.py b/state-manager/app/models/errored_models.py
index 5acfaa34..8814d56a 100644
--- a/state-manager/app/models/errored_models.py
+++ b/state-manager/app/models/errored_models.py
@@ -7,4 +7,5 @@ class ErroredRequestModel(BaseModel):
 
 
 class ErroredResponseModel(BaseModel):
-    status: StateStatusEnum = Field(..., description="Status of the state")
\ No newline at end of file
+    status: StateStatusEnum = Field(..., description="Status of the state")
+    retry_created: bool = Field(default=False, description="Whether a retry state was created")
\ No newline at end of file
diff --git a/state-manager/app/models/graph_models.py b/state-manager/app/models/graph_models.py
index 1fd80bd1..8e67cb2d 100644
--- a/state-manager/app/models/graph_models.py
+++ b/state-manager/app/models/graph_models.py
@@ -3,16 +3,19 @@
 from typing import Dict, List, Optional
 from datetime import datetime
 from .graph_template_validation_status import GraphTemplateValidationStatus
+from .retry_policy_model import RetryPolicyModel
 
 
 class UpsertGraphTemplateRequest(BaseModel):
     secrets: Dict[str, str] = Field(..., description="Dictionary of secrets that are used while graph execution")
     nodes: List[NodeTemplate] = Field(..., description="List of node templates that define the graph structure")
+    retry_policy: RetryPolicyModel = Field(default_factory=RetryPolicyModel, description="Retry policy of the graph")
 
 
 class UpsertGraphTemplateResponse(BaseModel):
     nodes: List[NodeTemplate] = Field(..., description="List of node templates that define the graph structure")
     secrets: Dict[str, bool] = Field(..., description="Dictionary of secrets that are used while graph execution")
+    retry_policy: RetryPolicyModel = Field(default_factory=RetryPolicyModel, description="Retry policy of the graph")
     created_at: datetime = Field(..., description="Timestamp when the graph template was created")
     updated_at: datetime = Field(..., description="Timestamp when the graph template was last updated")
     validation_status: GraphTemplateValidationStatus = Field(..., description="Current validation status of the graph template")
diff --git a/state-manager/app/models/retry_policy_model.py b/state-manager/app/models/retry_policy_model.py
new file mode 100644
index 00000000..be719176
--- /dev/null
+++ b/state-manager/app/models/retry_policy_model.py
@@ -0,0 +1,69 @@
+from pydantic import BaseModel, Field
+from enum import Enum
+import random
+
+class RetryStrategy(str, Enum):
+    EXPONENTIAL = "EXPONENTIAL"
+    EXPONENTIAL_FULL_JITTER = "EXPONENTIAL_FULL_JITTER"
+    EXPONENTIAL_EQUAL_JITTER = "EXPONENTIAL_EQUAL_JITTER"
+
+    LINEAR = "LINEAR"
+    LINEAR_FULL_JITTER = "LINEAR_FULL_JITTER"
+    LINEAR_EQUAL_JITTER = "LINEAR_EQUAL_JITTER"
+
+    FIXED = "FIXED"
+    FIXED_FULL_JITTER = "FIXED_FULL_JITTER"
+    FIXED_EQUAL_JITTER = "FIXED_EQUAL_JITTER"
+
+class RetryPolicyModel(BaseModel):
+    max_retries: int = Field(default=3, description="The maximum number of retries", ge=0)
+    strategy: RetryStrategy = Field(default=RetryStrategy.EXPONENTIAL, description="The method of retry")
+    backoff_factor: int = Field(default=2000, description="The backoff factor in milliseconds (default: 2000 = 2 seconds)", gt=0)
+    exponent: int = Field(default=2, description="The exponent for the exponential retry strategy", gt=0)
+    max_delay: int | None = Field(default=None, description="The maximum delay in milliseconds (no default limit when None)", gt=0)
+
+    def compute_delay(self, retry_count: int) -> int:
+
+        def _cap(value: int) -> int:
+            if self.max_delay is not None:
+                return min(value, self.max_delay)
+            return value
+
+        if retry_count < 1:
+            raise ValueError(f"Retry count must be greater than or equal to 1, got {retry_count}")
+        
+        if self.strategy == RetryStrategy.EXPONENTIAL:
+            return _cap(self.backoff_factor * (self.exponent ** (retry_count - 1)))
+        
+        elif self.strategy == RetryStrategy.EXPONENTIAL_FULL_JITTER:
+            base = self.backoff_factor * (self.exponent ** (retry_count - 1))
+            return _cap(int(random.uniform(0, base)))
+        
+        elif self.strategy == RetryStrategy.EXPONENTIAL_EQUAL_JITTER:
+            base = self.backoff_factor * (self.exponent ** (retry_count - 1))
+            return _cap(int(base/2 + random.uniform(0, base / 2)))
+        
+        elif self.strategy == RetryStrategy.LINEAR:
+            return _cap(self.backoff_factor * retry_count)
+        
+        elif self.strategy == RetryStrategy.LINEAR_FULL_JITTER:
+            base = self.backoff_factor * retry_count
+            return _cap(int(random.uniform(0, base)))
+
+        elif self.strategy == RetryStrategy.LINEAR_EQUAL_JITTER:
+            base = self.backoff_factor * retry_count
+            return _cap(int(base/2 + random.uniform(0, base / 2)))
+
+        elif self.strategy == RetryStrategy.FIXED:
+            return _cap(self.backoff_factor)
+        
+        elif self.strategy == RetryStrategy.FIXED_FULL_JITTER:
+            base = self.backoff_factor
+            return _cap(int(random.uniform(0, base)))
+
+        elif self.strategy == RetryStrategy.FIXED_EQUAL_JITTER:
+            base = self.backoff_factor
+            return _cap(int(base/2 + random.uniform(0, base / 2)))
+
+        else:
+            raise ValueError(f"Invalid retry strategy: {self.strategy}")
\ No newline at end of file
diff --git a/state-manager/tests/unit/controller/test_errored_state.py b/state-manager/tests/unit/controller/test_errored_state.py
index b1fc7df5..032571bf 100644
--- a/state-manager/tests/unit/controller/test_errored_state.py
+++ b/state-manager/tests/unit/controller/test_errored_state.py
@@ -2,6 +2,7 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 from fastapi import HTTPException, status
 from beanie import PydanticObjectId
+from pymongo.errors import DuplicateKeyError
 
 from app.controller.errored_state import errored_state
 from app.models.errored_models import ErroredRequestModel
@@ -34,6 +35,16 @@ def mock_state_queued(self):
         state = MagicMock()
         state.id = PydanticObjectId()
         state.status = StateStatusEnum.QUEUED
+        state.graph_name = "test_graph"
+        state.retry_count = 0
+        state.node_name = "test_node"
+        state.namespace_name = "test_namespace"
+        state.identifier = "test_identifier"
+        state.run_id = "test_run_id"
+        state.inputs = {}
+        state.parents = []
+        state.does_unites = False
+        state.fanout_id = None
         return state
 
     @pytest.fixture
@@ -41,11 +52,23 @@ def mock_state_executed(self):
         state = MagicMock()
         state.id = PydanticObjectId()
         state.status = StateStatusEnum.EXECUTED
+        state.graph_name = "test_graph"
+        state.retry_count = 0
+        state.node_name = "test_node"
+        state.namespace_name = "test_namespace"
+        state.identifier = "test_identifier"
+        state.run_id = "test_run_id"
+        state.inputs = {}
+        state.parents = []
+        state.does_unites = False
+        state.fanout_id = None
         return state
 
     @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
     async def test_errored_state_success_queued(
         self,
+        mock_graph_template_class,
         mock_state_class,
         mock_namespace,
         mock_state_id,
@@ -55,10 +78,18 @@ async def test_errored_state_success_queued(
     ):
         """Test successful error marking of queued state"""      
         
-        mock_state_queued.save = AsyncMock()     
+        # Mock GraphTemplate.get to return a valid graph template
+        mock_graph_template = MagicMock()
+        mock_graph_template.retry_policy.max_retries = 3
+        mock_graph_template.retry_policy.compute_delay = MagicMock(return_value=1000)
+        mock_graph_template_class.get = AsyncMock(return_value=mock_graph_template)
         
-        mock_state_queued.status = StateStatusEnum.QUEUED
-        mock_state_queued.save = AsyncMock()
+        # Mock State constructor and insert method
+        mock_retry_state = MagicMock()
+        mock_retry_state.insert = AsyncMock(return_value=mock_retry_state)
+        mock_state_class.return_value = mock_retry_state
+        
+        mock_state_queued.save = AsyncMock()     
         mock_state_class.find_one = AsyncMock(return_value=mock_state_queued)
 
         # Act
@@ -75,8 +106,10 @@ async def test_errored_state_success_queued(
         
 
     @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
     async def test_errored_state_success_executed(
         self,
+        mock_graph_template_class,
         mock_state_class,
         mock_namespace,
         mock_state_id,
@@ -84,26 +117,22 @@ async def test_errored_state_success_executed(
         mock_state_executed,
         mock_request_id
     ):
-        """Test successful error marking of executed state"""
-      
-        mock_state_executed.save = AsyncMock() 
-
-        mock_state_executed.status = StateStatusEnum.QUEUED
-        mock_state_executed.save = AsyncMock()
+        """Test that executed states cannot be marked as errored"""
+        # Arrange
+        mock_state_executed.status = StateStatusEnum.EXECUTED
         mock_state_class.find_one = AsyncMock(return_value=mock_state_executed)
 
-        # Act
-        result = await errored_state(
-            mock_namespace,
-            mock_state_id,
-            mock_errored_request,
-            mock_request_id
-        )
-
-        # Assert
-        assert result.status == StateStatusEnum.ERRORED
-        assert mock_state_class.find_one.call_count == 1  # Called once for finding
+        # Act & Assert
+        with pytest.raises(HTTPException) as exc_info:
+            await errored_state(
+                mock_namespace,
+                mock_state_id,
+                mock_errored_request,
+                mock_request_id
+            )
         
+        assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+        assert exc_info.value.detail == "State is already executed"
 
     @patch('app.controller.errored_state.State')
     async def test_errored_state_not_found(
@@ -197,6 +226,7 @@ async def test_errored_state_already_executed(
         # Arrange
         mock_state = MagicMock()
         mock_state.status = StateStatusEnum.EXECUTED
+        mock_state.graph_name = "test_graph"
         mock_state_class.find_one = AsyncMock(return_value=mock_state)
 
         # Act & Assert
@@ -222,7 +252,7 @@ async def test_errored_state_database_error(
     ):
         """Test handling of database errors"""
         # Arrange
-        mock_state_class.find_one = MagicMock(side_effect=Exception("Database error"))
+        mock_state_class.find_one = AsyncMock(side_effect=Exception("Database error"))
 
         # Act & Assert
         with pytest.raises(Exception) as exc_info:
@@ -236,31 +266,42 @@ async def test_errored_state_database_error(
         assert str(exc_info.value) == "Database error"
 
     @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
     async def test_errored_state_with_different_error_message(
         self,
+        mock_graph_template_class,
         mock_state_class,
         mock_namespace,
         mock_state_id,
+        mock_errored_request,
         mock_state_queued,
         mock_request_id
     ):
         """Test error marking with different error message"""
         # Arrange
-        errored_request = ErroredRequestModel(
+        different_error_request = ErroredRequestModel(
             error="Different error message"
-        )       
+        )
+        
+        # Mock GraphTemplate.get to return a valid graph template
+        mock_graph_template = MagicMock()
+        mock_graph_template.retry_policy.max_retries = 3
+        mock_graph_template.retry_policy.compute_delay = MagicMock(return_value=1000)
+        mock_graph_template_class.get = AsyncMock(return_value=mock_graph_template)
+        
+        # Mock State constructor and insert method
+        mock_retry_state = MagicMock()
+        mock_retry_state.insert = AsyncMock(return_value=mock_retry_state)
+        mock_state_class.return_value = mock_retry_state
         
         mock_state_queued.save = AsyncMock()
-
-        mock_state_queued.status = StateStatusEnum.QUEUED
-        mock_state_queued.set = AsyncMock()
         mock_state_class.find_one = AsyncMock(return_value=mock_state_queued)
 
         # Act
         result = await errored_state(
             mock_namespace,
             mock_state_id,
-            errored_request,
+            different_error_request,
             mock_request_id
         )
 
@@ -269,3 +310,183 @@ async def test_errored_state_with_different_error_message(
         assert mock_state_class.find_one.call_count == 1  # Called once for finding
         assert mock_state_queued.error == "Different error message"
 
+    @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
+    async def test_errored_state_graph_template_not_found(
+        self,
+        mock_graph_template_class,
+        mock_state_class,
+        mock_namespace,
+        mock_state_id,
+        mock_errored_request,
+        mock_state_queued,
+        mock_request_id
+    ):
+        """Test when graph template is not found"""
+        # Arrange
+        mock_state_queued.save = AsyncMock()
+        mock_state_class.find_one = AsyncMock(return_value=mock_state_queued)
+        
+        # Mock GraphTemplate.get to raise ValueError with "Graph template not found"
+        mock_graph_template_class.get = AsyncMock(side_effect=ValueError("Graph template not found"))
+
+        # Act & Assert
+        with pytest.raises(HTTPException) as exc_info:
+            await errored_state(
+                mock_namespace,
+                mock_state_id,
+                mock_errored_request,
+                mock_request_id
+            )
+        
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.detail == "Graph template not found"
+
+    @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
+    async def test_errored_state_graph_template_other_error(
+        self,
+        mock_graph_template_class,
+        mock_state_class,
+        mock_namespace,
+        mock_state_id,
+        mock_errored_request,
+        mock_state_queued,
+        mock_request_id
+    ):
+        """Test when graph template raises other exceptions"""
+        # Arrange
+        mock_state_queued.save = AsyncMock()
+        mock_state_class.find_one = AsyncMock(return_value=mock_state_queued)
+        
+        # Mock GraphTemplate.get to raise a different exception
+        mock_graph_template_class.get = AsyncMock(side_effect=Exception("Database connection error"))
+
+        # Act & Assert
+        with pytest.raises(Exception) as exc_info:
+            await errored_state(
+                mock_namespace,
+                mock_state_id,
+                mock_errored_request,
+                mock_request_id
+            )
+        
+        assert str(exc_info.value) == "Database connection error"
+
+    @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
+    async def test_errored_state_duplicate_key_error(
+        self,
+        mock_graph_template_class,
+        mock_state_class,
+        mock_namespace,
+        mock_state_id,
+        mock_errored_request,
+        mock_state_queued,
+        mock_request_id
+    ):
+        """Test when creating retry state encounters DuplicateKeyError"""
+        # Arrange
+        mock_state_queued.save = AsyncMock()
+        mock_state_class.find_one = AsyncMock(return_value=mock_state_queued)
+        
+        # Mock GraphTemplate.get to return a valid graph template
+        mock_graph_template = MagicMock()
+        mock_graph_template.retry_policy.max_retries = 3
+        mock_graph_template.retry_policy.compute_delay = MagicMock(return_value=1000)
+        mock_graph_template_class.get = AsyncMock(return_value=mock_graph_template)
+        
+        # Mock State constructor and insert method to raise DuplicateKeyError
+        mock_retry_state = MagicMock()
+        mock_retry_state.insert = AsyncMock(side_effect=DuplicateKeyError("Duplicate key error"))
+        mock_state_class.return_value = mock_retry_state
+
+        # Act
+        result = await errored_state(
+            mock_namespace,
+            mock_state_id,
+            mock_errored_request,
+            mock_request_id
+        )
+
+        # Assert
+        assert result.status == StateStatusEnum.ERRORED
+        assert not result.retry_created
+        assert mock_state_queued.status == StateStatusEnum.ERRORED
+        assert mock_state_queued.error == mock_errored_request.error
+
+    @patch('app.controller.errored_state.State')
+    @patch('app.controller.errored_state.GraphTemplate')
+    async def test_errored_state_max_retries_reached(
+        self,
+        mock_graph_template_class,
+        mock_state_class,
+        mock_namespace,
+        mock_state_id,
+        mock_errored_request,
+        mock_request_id
+    ):
+        """Test when state has reached max retries"""
+        # Arrange
+        mock_state = MagicMock()
+        mock_state.id = PydanticObjectId()
+        mock_state.status = StateStatusEnum.QUEUED
+        mock_state.graph_name = "test_graph"
+        mock_state.retry_count = 3  # Already at max retries
+        mock_state.node_name = "test_node"
+        mock_state.namespace_name = "test_namespace"
+        mock_state.identifier = "test_identifier"
+        mock_state.run_id = "test_run_id"
+        mock_state.inputs = {}
+        mock_state.parents = []
+        mock_state.does_unites = False
+        mock_state.fanout_id = None
+        mock_state.save = AsyncMock()
+        
+        mock_state_class.find_one = AsyncMock(return_value=mock_state)
+        
+        # Mock GraphTemplate.get to return a valid graph template with max_retries = 3
+        mock_graph_template = MagicMock()
+        mock_graph_template.retry_policy.max_retries = 3
+        mock_graph_template_class.get = AsyncMock(return_value=mock_graph_template)
+
+        # Act
+        result = await errored_state(
+            mock_namespace,
+            mock_state_id,
+            mock_errored_request,
+            mock_request_id
+        )
+
+        # Assert
+        assert result.status == StateStatusEnum.ERRORED
+        assert not result.retry_created
+        assert mock_state.status == StateStatusEnum.ERRORED
+        assert mock_state.error == mock_errored_request.error
+        # Verify that State constructor was not called (no retry created)
+        mock_state_class.assert_not_called()
+
+    @patch('app.controller.errored_state.State')
+    async def test_errored_state_general_exception(
+        self,
+        mock_state_class,
+        mock_namespace,
+        mock_state_id,
+        mock_errored_request,
+        mock_request_id
+    ):
+        """Test handling of general exceptions in the main try-catch block"""
+        # Arrange
+        mock_state_class.find_one = AsyncMock(side_effect=Exception("Unexpected error"))
+
+        # Act & Assert
+        with pytest.raises(Exception) as exc_info:
+            await errored_state(
+                mock_namespace,
+                mock_state_id,
+                mock_errored_request,
+                mock_request_id
+            )
+        
+        assert str(exc_info.value) == "Unexpected error"
+
diff --git a/state-manager/tests/unit/controller/test_upsert_graph_template.py b/state-manager/tests/unit/controller/test_upsert_graph_template.py
index 2d309376..00832b7f 100644
--- a/state-manager/tests/unit/controller/test_upsert_graph_template.py
+++ b/state-manager/tests/unit/controller/test_upsert_graph_template.py
@@ -7,6 +7,7 @@
 from app.models.graph_models import UpsertGraphTemplateRequest
 from app.models.graph_template_validation_status import GraphTemplateValidationStatus
 from app.models.node_template_model import NodeTemplate
+from app.models.retry_policy_model import RetryPolicyModel
 
 
 class TestUpsertGraphTemplate:
@@ -74,6 +75,14 @@ def mock_existing_template(self, mock_nodes, mock_secrets):
         template.updated_at = datetime(2023, 1, 2, 12, 0, 0)
         template.get_secrets.return_value = mock_secrets
         template.set_secrets.return_value = template
+        
+        # Add proper retry_policy using real RetryPolicyModel
+        template.retry_policy = RetryPolicyModel(
+            max_retries=3,
+            backoff_factor=1000,
+            max_delay=30000
+        )
+        
         return template
 
     @patch('app.controller.upsert_graph_template.GraphTemplate')
@@ -148,6 +157,14 @@ async def test_upsert_graph_template_create_new(
         mock_new_template.get_secrets.return_value = mock_upsert_request.secrets
         mock_new_template.set_secrets.return_value = mock_new_template
         
+        # Add proper retry_policy mock
+        mock_retry_policy = RetryPolicyModel(
+            max_retries=3,
+            backoff_factor=1000,
+            max_delay=30000
+        )
+        mock_new_template.retry_policy = mock_retry_policy
+        
         mock_graph_template_class.insert = AsyncMock(return_value=mock_new_template)
 
         # Act
@@ -225,6 +242,14 @@ async def test_upsert_graph_template_with_empty_nodes(
         mock_existing_template.get_secrets.return_value = {}
         mock_existing_template.set_secrets.return_value = mock_existing_template
         
+        # Add proper retry_policy mock
+        mock_retry_policy = RetryPolicyModel(
+            max_retries=3,
+            backoff_factor=1000,
+            max_delay=30000
+        )
+        mock_existing_template.retry_policy = mock_retry_policy
+        
         mock_existing_template.update = AsyncMock()
 
         mock_graph_template_class.find_one = AsyncMock(return_value=mock_existing_template)
@@ -269,6 +294,14 @@ async def test_upsert_graph_template_with_validation_errors(
         mock_existing_template.get_secrets.return_value = mock_upsert_request.secrets
         mock_existing_template.set_secrets.return_value = mock_existing_template
 
+        # Add proper retry_policy mock
+        mock_retry_policy = RetryPolicyModel(
+            max_retries=3,
+            backoff_factor=1000,
+            max_delay=30000
+        )
+        mock_existing_template.retry_policy = mock_retry_policy
+
         mock_existing_template.update = AsyncMock()
         
         mock_graph_template_class.find_one = AsyncMock(return_value=mock_existing_template)
diff --git a/state-manager/tests/unit/models/test_retry_policy_model.py b/state-manager/tests/unit/models/test_retry_policy_model.py
new file mode 100644
index 00000000..038f3adc
--- /dev/null
+++ b/state-manager/tests/unit/models/test_retry_policy_model.py
@@ -0,0 +1,377 @@
+import pytest
+from app.models.retry_policy_model import RetryPolicyModel, RetryStrategy
+
+
+class TestRetryPolicyModel:
+    """Test cases for RetryPolicyModel"""
+
+    def test_default_initialization(self):
+        """Test RetryPolicyModel with default values"""
+        policy = RetryPolicyModel()
+        
+        assert policy.max_retries == 3
+        assert policy.strategy == RetryStrategy.EXPONENTIAL
+        assert policy.backoff_factor == 2000
+        assert policy.exponent == 2
+        assert policy.max_delay is None
+
+    def test_custom_initialization(self):
+        """Test RetryPolicyModel with custom values"""
+        policy = RetryPolicyModel(
+            max_retries=5,
+            strategy=RetryStrategy.LINEAR,
+            backoff_factor=1000,
+            exponent=3,
+            max_delay=10000
+        )
+        
+        assert policy.max_retries == 5
+        assert policy.strategy == RetryStrategy.LINEAR
+        assert policy.backoff_factor == 1000
+        assert policy.exponent == 3
+        assert policy.max_delay == 10000
+
+    def test_exponential_strategy(self):
+        """Test exponential retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=2
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # 1000 * 2^0
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 2000  # 1000 * 2^1
+        
+        # Test retry count 3
+        delay = policy.compute_delay(3)
+        assert delay == 4000  # 1000 * 2^2
+
+    def test_exponential_strategy_with_max_delay(self):
+        """Test exponential retry strategy with max delay cap"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=2,
+            max_delay=3000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # 1000 * 2^0
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 2000  # 1000 * 2^1
+        
+        # Test retry count 3 (should be capped at max_delay)
+        delay = policy.compute_delay(3)
+        assert delay == 3000  # Capped at max_delay
+
+    def test_linear_strategy(self):
+        """Test linear retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.LINEAR,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # 1000 * 1
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 2000  # 1000 * 2
+        
+        # Test retry count 3
+        delay = policy.compute_delay(3)
+        assert delay == 3000  # 1000 * 3
+
+    def test_fixed_strategy(self):
+        """Test fixed retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.FIXED,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # Always 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 1000  # Always 1000
+        
+        # Test retry count 3
+        delay = policy.compute_delay(3)
+        assert delay == 1000  # Always 1000
+
+    def test_exponential_full_jitter_strategy(self):
+        """Test exponential full jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL_FULL_JITTER,
+            backoff_factor=1000,
+            exponent=2
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 0 <= delay <= 1000  # Random between 0 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 0 <= delay <= 2000  # Random between 0 and 2000
+
+    def test_exponential_equal_jitter_strategy(self):
+        """Test exponential equal jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL_EQUAL_JITTER,
+            backoff_factor=1000,
+            exponent=2
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 500 <= delay <= 1000  # Random between 500 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 1000 <= delay <= 2000  # Random between 1000 and 2000
+
+    def test_linear_full_jitter_strategy(self):
+        """Test linear full jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.LINEAR_FULL_JITTER,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 0 <= delay <= 1000  # Random between 0 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 0 <= delay <= 2000  # Random between 0 and 2000
+
+    def test_linear_equal_jitter_strategy(self):
+        """Test linear equal jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.LINEAR_EQUAL_JITTER,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 500 <= delay <= 1000  # Random between 500 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 1000 <= delay <= 2000  # Random between 1000 and 2000
+
+    def test_fixed_full_jitter_strategy(self):
+        """Test fixed full jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.FIXED_FULL_JITTER,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 0 <= delay <= 1000  # Random between 0 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 0 <= delay <= 1000  # Random between 0 and 1000
+
+    def test_fixed_equal_jitter_strategy(self):
+        """Test fixed equal jitter retry strategy"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.FIXED_EQUAL_JITTER,
+            backoff_factor=1000
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert 500 <= delay <= 1000  # Random between 500 and 1000
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert 500 <= delay <= 1000  # Random between 500 and 1000
+
+    def test_invalid_retry_count(self):
+        """Test that invalid retry count raises ValueError"""
+        policy = RetryPolicyModel()
+        
+        # Test retry count 0
+        with pytest.raises(ValueError, match="Retry count must be greater than or equal to 1"):
+            policy.compute_delay(0)
+        
+        # Test retry count -1
+        with pytest.raises(ValueError, match="Retry count must be greater than or equal to 1"):
+            policy.compute_delay(-1)
+
+    def test_max_delay_capping(self):
+        """Test that max_delay properly caps the delay"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=2,
+            max_delay=1500
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # Not capped
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 1500  # Capped at max_delay
+        
+        # Test retry count 3
+        delay = policy.compute_delay(3)
+        assert delay == 1500  # Capped at max_delay
+
+    def test_jitter_strategies_with_max_delay(self):
+        """Test jitter strategies with max delay capping"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL_FULL_JITTER,
+            backoff_factor=1000,
+            exponent=2,
+            max_delay=1500
+        )
+        
+        # Test multiple calls to ensure max_delay is respected
+        for _ in range(10):
+            delay = policy.compute_delay(3)
+            assert delay <= 1500  # Should never exceed max_delay
+
+    def test_different_exponents(self):
+        """Test different exponent values"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=3
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 1000  # 1000 * 3^0
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 3000  # 1000 * 3^1
+        
+        # Test retry count 3
+        delay = policy.compute_delay(3)
+        assert delay == 9000  # 1000 * 3^2
+
+    def test_different_backoff_factors(self):
+        """Test different backoff factor values"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.LINEAR,
+            backoff_factor=500
+        )
+        
+        # Test retry count 1
+        delay = policy.compute_delay(1)
+        assert delay == 500  # 500 * 1
+        
+        # Test retry count 2
+        delay = policy.compute_delay(2)
+        assert delay == 1000  # 500 * 2
+
+    def test_model_validation(self):
+        """Test Pydantic model validation"""
+        # Test valid model
+        RetryPolicyModel(
+            max_retries=5,
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=2,
+            max_delay=10000
+        )
+        
+        # Test invalid max_retries (negative)
+        with pytest.raises(ValueError):
+            RetryPolicyModel(max_retries=-1)
+        
+        # Test invalid backoff_factor (non-positive)
+        with pytest.raises(ValueError):
+            RetryPolicyModel(backoff_factor=0)
+        
+        # Test invalid exponent (non-positive)
+        with pytest.raises(ValueError):
+            RetryPolicyModel(exponent=0)
+        
+        # Test invalid max_delay (non-positive)
+        with pytest.raises(ValueError):
+            RetryPolicyModel(max_delay=0)
+
+    def test_strategy_enum_values(self):
+        """Test all RetryStrategy enum values"""
+        strategies = [
+            RetryStrategy.EXPONENTIAL,
+            RetryStrategy.EXPONENTIAL_FULL_JITTER,
+            RetryStrategy.EXPONENTIAL_EQUAL_JITTER,
+            RetryStrategy.LINEAR,
+            RetryStrategy.LINEAR_FULL_JITTER,
+            RetryStrategy.LINEAR_EQUAL_JITTER,
+            RetryStrategy.FIXED,
+            RetryStrategy.FIXED_FULL_JITTER,
+            RetryStrategy.FIXED_EQUAL_JITTER
+        ]
+        
+        for strategy in strategies:
+            policy = RetryPolicyModel(strategy=strategy)
+            assert policy.strategy == strategy
+            # Should not raise any exceptions
+            delay = policy.compute_delay(1)
+            assert isinstance(delay, int)
+            assert delay >= 0
+
+    def test_edge_case_large_numbers(self):
+        """Test edge cases with large numbers"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000000,
+            exponent=10
+        )
+        
+        # Test that large numbers don't cause overflow
+        delay = policy.compute_delay(3)
+        assert isinstance(delay, int)
+        assert delay > 0
+
+    def test_consistency_across_calls(self):
+        """Test that non-jitter strategies are consistent"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL,
+            backoff_factor=1000,
+            exponent=2
+        )
+        
+        # Multiple calls should return the same result for non-jitter strategies
+        delay1 = policy.compute_delay(2)
+        delay2 = policy.compute_delay(2)
+        assert delay1 == delay2
+
+    def test_jitter_variability(self):
+        """Test that jitter strategies produce different results"""
+        policy = RetryPolicyModel(
+            strategy=RetryStrategy.EXPONENTIAL_FULL_JITTER,
+            backoff_factor=1000,
+            exponent=2
+        )
+        
+        # Multiple calls should return different results for jitter strategies
+        delays = set()
+        for _ in range(100):
+            delay = policy.compute_delay(2)
+            delays.add(delay)
+        
+        # Should have multiple different values (not all the same)
+        assert len(delays) > 1 
\ No newline at end of file