Skip to content

Commit e9873f9

Browse files
feat(api): manual updates
delete /v1/openai/v1/responses/{response_id} post /v1/inference/rerank get /v1/openai/v1/models delete /v1/shields/{identifier}
1 parent a2f4544 commit e9873f9

20 files changed

+1045
-21
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
configured_endpoints: 107
1+
configured_endpoints: 111
22
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-f252873ea1e1f38fd207331ef2621c511154d5be3f4076e59cc15754fc58eee4.yml
33
openapi_spec_hash: 10cbb4337a06a9fdd7d08612dd6044c3
4-
config_hash: 374d9711288576877a9fabb34e4da7b9
4+
config_hash: 0358112cc0f3d880b4d55debdbe1cfa3

api.md

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,20 @@ Methods:
8181
Types:
8282

8383
```python
84-
from llama_stack_client.types import ResponseObject, ResponseObjectStream, ResponseListResponse
84+
from llama_stack_client.types import (
85+
ResponseObject,
86+
ResponseObjectStream,
87+
ResponseListResponse,
88+
ResponseDeleteResponse,
89+
)
8590
```
8691

8792
Methods:
8893

8994
- <code title="post /v1/openai/v1/responses">client.responses.<a href="./src/llama_stack_client/resources/responses/responses.py">create</a>(\*\*<a href="src/llama_stack_client/types/response_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/response_object.py">ResponseObject</a></code>
9095
- <code title="get /v1/openai/v1/responses/{response_id}">client.responses.<a href="./src/llama_stack_client/resources/responses/responses.py">retrieve</a>(response_id) -> <a href="./src/llama_stack_client/types/response_object.py">ResponseObject</a></code>
9196
- <code title="get /v1/openai/v1/responses">client.responses.<a href="./src/llama_stack_client/resources/responses/responses.py">list</a>(\*\*<a href="src/llama_stack_client/types/response_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/response_list_response.py">SyncOpenAICursorPage[ResponseListResponse]</a></code>
97+
- <code title="delete /v1/openai/v1/responses/{response_id}">client.responses.<a href="./src/llama_stack_client/resources/responses/responses.py">delete</a>(response_id) -> <a href="./src/llama_stack_client/types/response_delete_response.py">ResponseDeleteResponse</a></code>
9298

9399
## InputItems
94100

@@ -242,6 +248,7 @@ from llama_stack_client.types import (
242248
EmbeddingsResponse,
243249
TokenLogProbs,
244250
InferenceBatchChatCompletionResponse,
251+
InferenceRerankResponse,
245252
)
246253
```
247254

@@ -252,6 +259,7 @@ Methods:
252259
- <code title="post /v1/inference/chat-completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/chat_completion_response.py">ChatCompletionResponse</a></code>
253260
- <code title="post /v1/inference/completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/completion_response.py">CompletionResponse</a></code>
254261
- <code title="post /v1/inference/embeddings">client.inference.<a href="./src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="./src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
262+
- <code title="post /v1/inference/rerank">client.inference.<a href="./src/llama_stack_client/resources/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_rerank_response.py">InferenceRerankResponse</a></code>
255263

256264
# Embeddings
257265

@@ -389,10 +397,22 @@ from llama_stack_client.types import ListModelsResponse, Model, ModelListRespons
389397

390398
Methods:
391399

392-
- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
393-
- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
394-
- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
395-
- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models.py">unregister</a>(model_id) -> None</code>
400+
- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
401+
- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
402+
- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
403+
- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
404+
405+
## OpenAI
406+
407+
Types:
408+
409+
```python
410+
from llama_stack_client.types.models import OpenAIListResponse
411+
```
412+
413+
Methods:
414+
415+
- <code title="get /v1/openai/v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/models/openai_list_response.py">OpenAIListResponse</a></code>
396416

397417
# PostTraining
398418

@@ -487,6 +507,7 @@ Methods:
487507

488508
- <code title="get /v1/shields/{identifier}">client.shields.<a href="./src/llama_stack_client/resources/shields.py">retrieve</a>(identifier) -> <a href="./src/llama_stack_client/types/shield.py">Shield</a></code>
489509
- <code title="get /v1/shields">client.shields.<a href="./src/llama_stack_client/resources/shields.py">list</a>() -> <a href="./src/llama_stack_client/types/shield_list_response.py">ShieldListResponse</a></code>
510+
- <code title="delete /v1/shields/{identifier}">client.shields.<a href="./src/llama_stack_client/resources/shields.py">delete</a>(identifier) -> None</code>
490511
- <code title="post /v1/shields">client.shields.<a href="./src/llama_stack_client/resources/shields.py">register</a>(\*\*<a href="src/llama_stack_client/types/shield_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shield.py">Shield</a></code>
491512

492513
# SyntheticDataGeneration

src/llama_stack_client/_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
)
6565
from .resources.files import FilesResource, AsyncFilesResource
6666
from .resources.tools import ToolsResource, AsyncToolsResource
67-
from .resources.models import ModelsResource, AsyncModelsResource
6867
from .resources.routes import RoutesResource, AsyncRoutesResource
6968
from .resources.safety import SafetyResource, AsyncSafetyResource
7069
from .resources.inspect import InspectResource, AsyncInspectResource
@@ -84,6 +83,7 @@
8483
from .resources.completions import CompletionsResource, AsyncCompletionsResource
8584
from .resources.moderations import ModerationsResource, AsyncModerationsResource
8685
from .resources.agents.agents import AgentsResource, AsyncAgentsResource
86+
from .resources.models.models import ModelsResource, AsyncModelsResource
8787
from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
8888
from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
8989
from .resources.synthetic_data_generation import (

src/llama_stack_client/resources/inference.py

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
from __future__ import annotations
44

55
import typing_extensions
6-
from typing import Union, Iterable
6+
from typing import Type, Union, Iterable, cast
77
from typing_extensions import Literal, overload
88

99
import httpx
1010

1111
from ..types import (
12+
inference_rerank_params,
1213
inference_completion_params,
1314
inference_embeddings_params,
1415
inference_chat_completion_params,
@@ -25,12 +26,14 @@
2526
async_to_raw_response_wrapper,
2627
async_to_streamed_response_wrapper,
2728
)
29+
from .._wrappers import DataWrapper
2830
from .._streaming import Stream, AsyncStream
2931
from .._base_client import make_request_options
3032
from ..types.completion_response import CompletionResponse
3133
from ..types.embeddings_response import EmbeddingsResponse
3234
from ..types.shared_params.message import Message
3335
from ..types.shared.batch_completion import BatchCompletion
36+
from ..types.inference_rerank_response import InferenceRerankResponse
3437
from ..types.shared_params.response_format import ResponseFormat
3538
from ..types.shared_params.sampling_params import SamplingParams
3639
from ..types.shared.chat_completion_response import ChatCompletionResponse
@@ -696,6 +699,64 @@ def embeddings(
696699
cast_to=EmbeddingsResponse,
697700
)
698701

702+
def rerank(
703+
self,
704+
*,
705+
items: SequenceNotStr[inference_rerank_params.Item],
706+
model: str,
707+
query: inference_rerank_params.Query,
708+
max_num_results: int | Omit = omit,
709+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
710+
# The extra values given here take precedence over values defined on the client or passed to this method.
711+
extra_headers: Headers | None = None,
712+
extra_query: Query | None = None,
713+
extra_body: Body | None = None,
714+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
715+
) -> InferenceRerankResponse:
716+
"""
717+
Rerank a list of documents based on their relevance to a query.
718+
719+
Args:
720+
items: List of items to rerank. Each item can be a string, text content part, or image
721+
content part. Each input must not exceed the model's max input token length.
722+
723+
model: The identifier of the reranking model to use.
724+
725+
query: The search query to rank items against. Can be a string, text content part, or
726+
image content part. The input must not exceed the model's max input token
727+
length.
728+
729+
max_num_results: (Optional) Maximum number of results to return. Default: returns all.
730+
731+
extra_headers: Send extra headers
732+
733+
extra_query: Add additional query parameters to the request
734+
735+
extra_body: Add additional JSON properties to the request
736+
737+
timeout: Override the client-level default timeout for this request, in seconds
738+
"""
739+
return self._post(
740+
"/v1/inference/rerank",
741+
body=maybe_transform(
742+
{
743+
"items": items,
744+
"model": model,
745+
"query": query,
746+
"max_num_results": max_num_results,
747+
},
748+
inference_rerank_params.InferenceRerankParams,
749+
),
750+
options=make_request_options(
751+
extra_headers=extra_headers,
752+
extra_query=extra_query,
753+
extra_body=extra_body,
754+
timeout=timeout,
755+
post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
756+
),
757+
cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
758+
)
759+
699760

700761
class AsyncInferenceResource(AsyncAPIResource):
701762
@cached_property
@@ -1351,6 +1412,64 @@ async def embeddings(
13511412
cast_to=EmbeddingsResponse,
13521413
)
13531414

1415+
async def rerank(
1416+
self,
1417+
*,
1418+
items: SequenceNotStr[inference_rerank_params.Item],
1419+
model: str,
1420+
query: inference_rerank_params.Query,
1421+
max_num_results: int | Omit = omit,
1422+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1423+
# The extra values given here take precedence over values defined on the client or passed to this method.
1424+
extra_headers: Headers | None = None,
1425+
extra_query: Query | None = None,
1426+
extra_body: Body | None = None,
1427+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
1428+
) -> InferenceRerankResponse:
1429+
"""
1430+
Rerank a list of documents based on their relevance to a query.
1431+
1432+
Args:
1433+
items: List of items to rerank. Each item can be a string, text content part, or image
1434+
content part. Each input must not exceed the model's max input token length.
1435+
1436+
model: The identifier of the reranking model to use.
1437+
1438+
query: The search query to rank items against. Can be a string, text content part, or
1439+
image content part. The input must not exceed the model's max input token
1440+
length.
1441+
1442+
max_num_results: (Optional) Maximum number of results to return. Default: returns all.
1443+
1444+
extra_headers: Send extra headers
1445+
1446+
extra_query: Add additional query parameters to the request
1447+
1448+
extra_body: Add additional JSON properties to the request
1449+
1450+
timeout: Override the client-level default timeout for this request, in seconds
1451+
"""
1452+
return await self._post(
1453+
"/v1/inference/rerank",
1454+
body=await async_maybe_transform(
1455+
{
1456+
"items": items,
1457+
"model": model,
1458+
"query": query,
1459+
"max_num_results": max_num_results,
1460+
},
1461+
inference_rerank_params.InferenceRerankParams,
1462+
),
1463+
options=make_request_options(
1464+
extra_headers=extra_headers,
1465+
extra_query=extra_query,
1466+
extra_body=extra_body,
1467+
timeout=timeout,
1468+
post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
1469+
),
1470+
cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
1471+
)
1472+
13541473

13551474
class InferenceResourceWithRawResponse:
13561475
def __init__(self, inference: InferenceResource) -> None:
@@ -1377,6 +1496,9 @@ def __init__(self, inference: InferenceResource) -> None:
13771496
inference.embeddings, # pyright: ignore[reportDeprecated],
13781497
)
13791498
)
1499+
self.rerank = to_raw_response_wrapper(
1500+
inference.rerank,
1501+
)
13801502

13811503

13821504
class AsyncInferenceResourceWithRawResponse:
@@ -1404,6 +1526,9 @@ def __init__(self, inference: AsyncInferenceResource) -> None:
14041526
inference.embeddings, # pyright: ignore[reportDeprecated],
14051527
)
14061528
)
1529+
self.rerank = async_to_raw_response_wrapper(
1530+
inference.rerank,
1531+
)
14071532

14081533

14091534
class InferenceResourceWithStreamingResponse:
@@ -1431,6 +1556,9 @@ def __init__(self, inference: InferenceResource) -> None:
14311556
inference.embeddings, # pyright: ignore[reportDeprecated],
14321557
)
14331558
)
1559+
self.rerank = to_streamed_response_wrapper(
1560+
inference.rerank,
1561+
)
14341562

14351563

14361564
class AsyncInferenceResourceWithStreamingResponse:
@@ -1458,3 +1586,6 @@ def __init__(self, inference: AsyncInferenceResource) -> None:
14581586
inference.embeddings, # pyright: ignore[reportDeprecated],
14591587
)
14601588
)
1589+
self.rerank = async_to_streamed_response_wrapper(
1590+
inference.rerank,
1591+
)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2+
3+
from .models import (
4+
ModelsResource,
5+
AsyncModelsResource,
6+
ModelsResourceWithRawResponse,
7+
AsyncModelsResourceWithRawResponse,
8+
ModelsResourceWithStreamingResponse,
9+
AsyncModelsResourceWithStreamingResponse,
10+
)
11+
from .openai import (
12+
OpenAIResource,
13+
AsyncOpenAIResource,
14+
OpenAIResourceWithRawResponse,
15+
AsyncOpenAIResourceWithRawResponse,
16+
OpenAIResourceWithStreamingResponse,
17+
AsyncOpenAIResourceWithStreamingResponse,
18+
)
19+
20+
__all__ = [
21+
"OpenAIResource",
22+
"AsyncOpenAIResource",
23+
"OpenAIResourceWithRawResponse",
24+
"AsyncOpenAIResourceWithRawResponse",
25+
"OpenAIResourceWithStreamingResponse",
26+
"AsyncOpenAIResourceWithStreamingResponse",
27+
"ModelsResource",
28+
"AsyncModelsResource",
29+
"ModelsResourceWithRawResponse",
30+
"AsyncModelsResourceWithRawResponse",
31+
"ModelsResourceWithStreamingResponse",
32+
"AsyncModelsResourceWithStreamingResponse",
33+
]

0 commit comments

Comments
 (0)