feat(api): manual updates

stainless-app[bot] · stainless-app[bot] · commit d510ae03f136 · 2025-06-28T01:49:57.000Z
force readme update
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 76
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/digitalocean%2Fgradientai-e8b3cbc80e18e4f7f277010349f25e1319156704f359911dc464cc21a0d077a6.yml
 openapi_spec_hash: c773d792724f5647ae25a5ae4ccec208
-config_hash: 651ae9b93d723d383facbf979fd97fee
+config_hash: 67ce33bbbf8698b50194d8da5fd009d6
diff --git a/README.md b/README.md
@@ -120,6 +120,50 @@ async def main() -> None:
 asyncio.run(main())
 ```
 
+## Streaming responses
+
+We provide support for streaming responses using Server Side Events (SSE).
+
+```python
+from gradientai import GradientAI
+
+client = GradientAI()
+
+stream = client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ],
+    model="llama3.3-70b-instruct",
+    stream=True,
+)
+for completion in stream:
+    print(completion.choices)
+```
+
+The async client uses the exact same interface.
+
+```python
+from gradientai import AsyncGradientAI
+
+client = AsyncGradientAI()
+
+stream = await client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ],
+    model="llama3.3-70b-instruct",
+    stream=True,
+)
+async for completion in stream:
+    print(completion.choices)
+```
+
 ## Using types
 
 Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
@@ -167,8 +211,14 @@ from gradientai import GradientAI
 client = GradientAI()
 
 try:
-    client.agents.versions.list(
-        uuid="REPLACE_ME",
+    client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": "What is the capital of France?",
+            }
+        ],
+        model="llama3.3-70b-instruct",
     )
 except gradientai.APIConnectionError as e:
     print("The server could not be reached")
@@ -212,8 +262,14 @@ client = GradientAI(
 )
 
 # Or, configure per-request:
-client.with_options(max_retries=5).agents.versions.list(
-    uuid="REPLACE_ME",
+client.with_options(max_retries=5).chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ],
+    model="llama3.3-70b-instruct",
 )
 ```
 
@@ -237,8 +293,14 @@ client = GradientAI(
 )
 
 # Override per-request:
-client.with_options(timeout=5.0).agents.versions.list(
-    uuid="REPLACE_ME",
+client.with_options(timeout=5.0).chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ],
+    model="llama3.3-70b-instruct",
 )
 ```
 
@@ -280,13 +342,17 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
 from gradientai import GradientAI
 
 client = GradientAI()
-response = client.agents.versions.with_raw_response.list(
-    uuid="REPLACE_ME",
+response = client.chat.completions.with_raw_response.create(
+    messages=[{
+        "role": "user",
+        "content": "What is the capital of France?",
+    }],
+    model="llama3.3-70b-instruct",
 )
 print(response.headers.get('X-My-Header'))
 
-version = response.parse()  # get the object that `agents.versions.list()` would have returned
-print(version.agent_versions)
+completion = response.parse()  # get the object that `chat.completions.create()` would have returned
+print(completion.choices)
 ```
 
 These methods return an [`APIResponse`](https://github.com/digitalocean/gradientai-python/tree/main/src/gradientai/_response.py) object.
@@ -300,8 +366,14 @@ The above interface eagerly reads the full response body when you make the reque
 To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
 
 ```python
-with client.agents.versions.with_streaming_response.list(
-    uuid="REPLACE_ME",
+with client.chat.completions.with_streaming_response.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ],
+    model="llama3.3-70b-instruct",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
diff --git a/tests/test_client.py b/tests/test_client.py
@@ -819,20 +819,36 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
     @mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None:
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__()
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="llama3-8b-instruct",
+            ).__enter__()
 
         assert _get_open_connections(self.client) == 0
 
     @mock.patch("gradientai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: GradientAI) -> None:
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500))
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            client.agents.versions.with_streaming_response.list(uuid="uuid").__enter__()
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="llama3-8b-instruct",
+            ).__enter__()
         assert _get_open_connections(self.client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@@ -859,9 +875,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
 
-        response = client.agents.versions.with_raw_response.list(uuid="uuid")
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
 
         assert response.retries_taken == failures_before_success
         assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
@@ -883,10 +907,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
 
-        response = client.agents.versions.with_raw_response.list(
-            uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()}
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            extra_headers={"x-stainless-retry-count": Omit()},
         )
 
         assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -908,10 +939,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
 
-        response = client.agents.versions.with_raw_response.list(
-            uuid="uuid", extra_headers={"x-stainless-retry-count": "42"}
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            extra_headers={"x-stainless-retry-count": "42"},
         )
 
         assert response.http_request.headers.get("x-stainless-retry-count") == "42"
@@ -1734,10 +1772,18 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
     async def test_retrying_timeout_errors_doesnt_leak(
         self, respx_mock: MockRouter, async_client: AsyncGradientAI
     ) -> None:
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__()
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="llama3-8b-instruct",
+            ).__aenter__()
 
         assert _get_open_connections(self.client) == 0
 
@@ -1746,10 +1792,18 @@ async def test_retrying_timeout_errors_doesnt_leak(
     async def test_retrying_status_errors_doesnt_leak(
         self, respx_mock: MockRouter, async_client: AsyncGradientAI
     ) -> None:
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(return_value=httpx.Response(500))
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            await async_client.agents.versions.with_streaming_response.list(uuid="uuid").__aenter__()
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="llama3-8b-instruct",
+            ).__aenter__()
         assert _get_open_connections(self.client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@@ -1777,9 +1831,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
 
-        response = await client.agents.versions.with_raw_response.list(uuid="uuid")
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+        )
 
         assert response.retries_taken == failures_before_success
         assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
@@ -1802,10 +1864,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
 
-        response = await client.agents.versions.with_raw_response.list(
-            uuid="uuid", extra_headers={"x-stainless-retry-count": Omit()}
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            extra_headers={"x-stainless-retry-count": Omit()},
         )
 
         assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -1828,10 +1897,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.get("/v2/gen-ai/agents/uuid/versions").mock(side_effect=retry_handler)
-
-        response = await client.agents.versions.with_raw_response.list(
-            uuid="uuid", extra_headers={"x-stainless-retry-count": "42"}
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="llama3-8b-instruct",
+            extra_headers={"x-stainless-retry-count": "42"},
         )
 
         assert response.http_request.headers.get("x-stainless-retry-count") == "42"