Skip to content

Commit 04834d2

Browse files
feat(api)!: fixes to remove deprecated inference resources
1 parent 7f24c43 commit 04834d2

15 files changed

+308
-1230
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 105
22
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-adcfaad1990d45e42b20e200a9ecc35ee32df5692bd9cd18ae898b0b7728c919.yml
33
openapi_spec_hash: 4f532287bafe5da0578a1c1a5e31c952
4-
config_hash: 7ec5a583f9c26b38993013bdfb0e7d46
4+
config_hash: 5b643c97c83a497d7d346253f1e175f3

README.md

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,11 @@ from llama_stack_client import LlamaStackClient
127127

128128
client = LlamaStackClient()
129129

130-
chat_completion_response = client.inference.chat_completion(
131-
messages=[
132-
{
133-
"content": "string",
134-
"role": "user",
135-
}
136-
],
137-
model_id="model_id",
138-
logprobs={},
130+
client.toolgroups.register(
131+
provider_id="provider_id",
132+
toolgroup_id="toolgroup_id",
133+
mcp_endpoint={"uri": "uri"},
139134
)
140-
print(chat_completion_response.logprobs)
141135
```
142136

143137
## File uploads
@@ -173,10 +167,7 @@ from llama_stack_client import LlamaStackClient
173167
client = LlamaStackClient()
174168

175169
try:
176-
client.agents.sessions.create(
177-
agent_id="agent_id",
178-
session_name="session_name",
179-
)
170+
client.agents.toolgroups.list()
180171
except llama_stack_client.APIConnectionError as e:
181172
print("The server could not be reached")
182173
print(e.__cause__) # an underlying Exception, likely raised within httpx.
@@ -219,10 +210,7 @@ client = LlamaStackClient(
219210
)
220211

221212
# Or, configure per-request:
222-
client.with_options(max_retries=5).agents.sessions.create(
223-
agent_id="agent_id",
224-
session_name="session_name",
225-
)
213+
client.with_options(max_retries=5).toolgroups.list.create()
226214
```
227215

228216
### Timeouts
@@ -245,10 +233,7 @@ client = LlamaStackClient(
245233
)
246234

247235
# Override per-request:
248-
client.with_options(timeout=5.0).agents.sessions.create(
249-
agent_id="agent_id",
250-
session_name="session_name",
251-
)
236+
client.with_options(timeout=5.0).toolgroups.list.create()
252237
```
253238

254239
On timeout, an `APITimeoutError` is thrown.
@@ -287,14 +272,11 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
287272
from llama_stack_client import LlamaStackClient
288273

289274
client = LlamaStackClient()
290-
response = client.agents.sessions.with_raw_response.create(
291-
agent_id="agent_id",
292-
session_name="session_name",
293-
)
275+
response = client.toolgroups.with_raw_response.list()
294276
print(response.headers.get('X-My-Header'))
295277

296-
session = response.parse() # get the object that `agents.sessions.create()` would have returned
297-
print(session.session_id)
278+
toolgroup = response.parse() # get the object that `toolgroups.list()` would have returned
279+
print(toolgroup)
298280
```
299281

300282
These methods return an [`APIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) object.
@@ -308,10 +290,7 @@ The above interface eagerly reads the full response body when you make the reque
308290
To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
309291

310292
```python
311-
with client.agents.sessions.with_streaming_response.create(
312-
agent_id="agent_id",
313-
session_name="session_name",
314-
) as response:
293+
with client.agents.toolgroups.with_streaming_response.list() as response:
315294
print(response.headers.get("X-My-Header"))
316295

317296
for line in response.iter_lines():

api.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,12 @@ Methods:
241241
Types:
242242

243243
```python
244-
from llama_stack_client.types import ChatCompletionResponseStreamChunk, TokenLogProbs
244+
from llama_stack_client.types import InferenceRerankResponse
245245
```
246246

247247
Methods:
248248

249-
- <code title="post /v1/inference/chat-completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/chat_completion_response.py">ChatCompletionResponse</a></code>
249+
- <code title="post /v1alpha/inference/rerank">client.inference.<a href="./src/llama_stack_client/resources/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_rerank_response.py">InferenceRerankResponse</a></code>
250250

251251
# Embeddings
252252

0 commit comments

Comments
 (0)