3
3
from __future__ import annotations
4
4
5
5
import typing_extensions
6
- from typing import Union , Iterable
6
+ from typing import Type , Union , Iterable , cast
7
7
from typing_extensions import Literal , overload
8
8
9
9
import httpx
10
10
11
11
from ..types import (
12
+ inference_rerank_params ,
12
13
inference_completion_params ,
13
14
inference_embeddings_params ,
14
15
inference_chat_completion_params ,
25
26
async_to_raw_response_wrapper ,
26
27
async_to_streamed_response_wrapper ,
27
28
)
29
+ from .._wrappers import DataWrapper
28
30
from .._streaming import Stream , AsyncStream
29
31
from .._base_client import make_request_options
30
32
from ..types .completion_response import CompletionResponse
31
33
from ..types .embeddings_response import EmbeddingsResponse
32
34
from ..types .shared_params .message import Message
33
35
from ..types .shared .batch_completion import BatchCompletion
36
+ from ..types .inference_rerank_response import InferenceRerankResponse
34
37
from ..types .shared_params .response_format import ResponseFormat
35
38
from ..types .shared_params .sampling_params import SamplingParams
36
39
from ..types .shared .chat_completion_response import ChatCompletionResponse
@@ -696,6 +699,64 @@ def embeddings(
696
699
cast_to = EmbeddingsResponse ,
697
700
)
698
701
702
+ def rerank (
703
+ self ,
704
+ * ,
705
+ items : SequenceNotStr [inference_rerank_params .Item ],
706
+ model : str ,
707
+ query : inference_rerank_params .Query ,
708
+ max_num_results : int | Omit = omit ,
709
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
710
+ # The extra values given here take precedence over values defined on the client or passed to this method.
711
+ extra_headers : Headers | None = None ,
712
+ extra_query : Query | None = None ,
713
+ extra_body : Body | None = None ,
714
+ timeout : float | httpx .Timeout | None | NotGiven = not_given ,
715
+ ) -> InferenceRerankResponse :
716
+ """
717
+ Rerank a list of documents based on their relevance to a query.
718
+
719
+ Args:
720
+ items: List of items to rerank. Each item can be a string, text content part, or image
721
+ content part. Each input must not exceed the model's max input token length.
722
+
723
+ model: The identifier of the reranking model to use.
724
+
725
+ query: The search query to rank items against. Can be a string, text content part, or
726
+ image content part. The input must not exceed the model's max input token
727
+ length.
728
+
729
+ max_num_results: (Optional) Maximum number of results to return. Default: returns all.
730
+
731
+ extra_headers: Send extra headers
732
+
733
+ extra_query: Add additional query parameters to the request
734
+
735
+ extra_body: Add additional JSON properties to the request
736
+
737
+ timeout: Override the client-level default timeout for this request, in seconds
738
+ """
739
+ return self ._post (
740
+ "/v1/inference/rerank" ,
741
+ body = maybe_transform (
742
+ {
743
+ "items" : items ,
744
+ "model" : model ,
745
+ "query" : query ,
746
+ "max_num_results" : max_num_results ,
747
+ },
748
+ inference_rerank_params .InferenceRerankParams ,
749
+ ),
750
+ options = make_request_options (
751
+ extra_headers = extra_headers ,
752
+ extra_query = extra_query ,
753
+ extra_body = extra_body ,
754
+ timeout = timeout ,
755
+ post_parser = DataWrapper [InferenceRerankResponse ]._unwrapper ,
756
+ ),
757
+ cast_to = cast (Type [InferenceRerankResponse ], DataWrapper [InferenceRerankResponse ]),
758
+ )
759
+
699
760
700
761
class AsyncInferenceResource (AsyncAPIResource ):
701
762
@cached_property
@@ -1351,6 +1412,64 @@ async def embeddings(
1351
1412
cast_to = EmbeddingsResponse ,
1352
1413
)
1353
1414
1415
+ async def rerank (
1416
+ self ,
1417
+ * ,
1418
+ items : SequenceNotStr [inference_rerank_params .Item ],
1419
+ model : str ,
1420
+ query : inference_rerank_params .Query ,
1421
+ max_num_results : int | Omit = omit ,
1422
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1423
+ # The extra values given here take precedence over values defined on the client or passed to this method.
1424
+ extra_headers : Headers | None = None ,
1425
+ extra_query : Query | None = None ,
1426
+ extra_body : Body | None = None ,
1427
+ timeout : float | httpx .Timeout | None | NotGiven = not_given ,
1428
+ ) -> InferenceRerankResponse :
1429
+ """
1430
+ Rerank a list of documents based on their relevance to a query.
1431
+
1432
+ Args:
1433
+ items: List of items to rerank. Each item can be a string, text content part, or image
1434
+ content part. Each input must not exceed the model's max input token length.
1435
+
1436
+ model: The identifier of the reranking model to use.
1437
+
1438
+ query: The search query to rank items against. Can be a string, text content part, or
1439
+ image content part. The input must not exceed the model's max input token
1440
+ length.
1441
+
1442
+ max_num_results: (Optional) Maximum number of results to return. Default: returns all.
1443
+
1444
+ extra_headers: Send extra headers
1445
+
1446
+ extra_query: Add additional query parameters to the request
1447
+
1448
+ extra_body: Add additional JSON properties to the request
1449
+
1450
+ timeout: Override the client-level default timeout for this request, in seconds
1451
+ """
1452
+ return await self ._post (
1453
+ "/v1/inference/rerank" ,
1454
+ body = await async_maybe_transform (
1455
+ {
1456
+ "items" : items ,
1457
+ "model" : model ,
1458
+ "query" : query ,
1459
+ "max_num_results" : max_num_results ,
1460
+ },
1461
+ inference_rerank_params .InferenceRerankParams ,
1462
+ ),
1463
+ options = make_request_options (
1464
+ extra_headers = extra_headers ,
1465
+ extra_query = extra_query ,
1466
+ extra_body = extra_body ,
1467
+ timeout = timeout ,
1468
+ post_parser = DataWrapper [InferenceRerankResponse ]._unwrapper ,
1469
+ ),
1470
+ cast_to = cast (Type [InferenceRerankResponse ], DataWrapper [InferenceRerankResponse ]),
1471
+ )
1472
+
1354
1473
1355
1474
class InferenceResourceWithRawResponse :
1356
1475
def __init__ (self , inference : InferenceResource ) -> None :
@@ -1377,6 +1496,9 @@ def __init__(self, inference: InferenceResource) -> None:
1377
1496
inference .embeddings , # pyright: ignore[reportDeprecated],
1378
1497
)
1379
1498
)
1499
+ self .rerank = to_raw_response_wrapper (
1500
+ inference .rerank ,
1501
+ )
1380
1502
1381
1503
1382
1504
class AsyncInferenceResourceWithRawResponse :
@@ -1404,6 +1526,9 @@ def __init__(self, inference: AsyncInferenceResource) -> None:
1404
1526
inference .embeddings , # pyright: ignore[reportDeprecated],
1405
1527
)
1406
1528
)
1529
+ self .rerank = async_to_raw_response_wrapper (
1530
+ inference .rerank ,
1531
+ )
1407
1532
1408
1533
1409
1534
class InferenceResourceWithStreamingResponse :
@@ -1431,6 +1556,9 @@ def __init__(self, inference: InferenceResource) -> None:
1431
1556
inference .embeddings , # pyright: ignore[reportDeprecated],
1432
1557
)
1433
1558
)
1559
+ self .rerank = to_streamed_response_wrapper (
1560
+ inference .rerank ,
1561
+ )
1434
1562
1435
1563
1436
1564
class AsyncInferenceResourceWithStreamingResponse :
@@ -1458,3 +1586,6 @@ def __init__(self, inference: AsyncInferenceResource) -> None:
1458
1586
inference .embeddings , # pyright: ignore[reportDeprecated],
1459
1587
)
1460
1588
)
1589
+ self .rerank = async_to_streamed_response_wrapper (
1590
+ inference .rerank ,
1591
+ )
0 commit comments