@@ -95,6 +95,20 @@ export class Inference extends APIResource {
95
95
) : Core . APIPromise < EmbeddingsResponse > {
96
96
return this . _client . post ( '/v1/inference/embeddings' , { body, ...options } ) ;
97
97
}
98
+
99
+ /**
100
+ * Rerank a list of documents based on their relevance to a query.
101
+ */
102
+ rerank (
103
+ body : InferenceRerankParams ,
104
+ options ?: Core . RequestOptions ,
105
+ ) : Core . APIPromise < InferenceRerankResponse > {
106
+ return (
107
+ this . _client . post ( '/v1/inference/rerank' , { body, ...options } ) as Core . APIPromise < {
108
+ data : InferenceRerankResponse ;
109
+ } >
110
+ ) . _thenUnwrap ( ( obj ) => obj . data ) ;
111
+ }
98
112
}
99
113
100
114
/**
@@ -197,6 +211,29 @@ export interface InferenceBatchChatCompletionResponse {
197
211
batch : Array < Shared . ChatCompletionResponse > ;
198
212
}
199
213
214
+ /**
215
+ * List of rerank result objects, sorted by relevance score (descending)
216
+ */
217
+ export type InferenceRerankResponse = Array < InferenceRerankResponse . InferenceRerankResponseItem > ;
218
+
219
+ export namespace InferenceRerankResponse {
220
+ /**
221
+ * A single rerank result from a reranking response.
222
+ */
223
+ export interface InferenceRerankResponseItem {
224
+ /**
225
+ * The original index of the document in the input list
226
+ */
227
+ index : number ;
228
+
229
+ /**
230
+ * The relevance score from the model output. Values are inverted when applicable
231
+ * so that higher scores indicate greater relevance.
232
+ */
233
+ relevance_score : number ;
234
+ }
235
+ }
236
+
200
237
export interface InferenceBatchChatCompletionParams {
201
238
/**
202
239
* The messages to generate completions for.
@@ -575,13 +612,142 @@ export interface InferenceEmbeddingsParams {
575
612
text_truncation ?: 'none' | 'start' | 'end' ;
576
613
}
577
614
615
+ export interface InferenceRerankParams {
616
+ /**
617
+ * List of items to rerank. Each item can be a string, text content part, or image
618
+ * content part. Each input must not exceed the model's max input token length.
619
+ */
620
+ items : Array <
621
+ | string
622
+ | InferenceRerankParams . OpenAIChatCompletionContentPartTextParam
623
+ | InferenceRerankParams . OpenAIChatCompletionContentPartImageParam
624
+ > ;
625
+
626
+ /**
627
+ * The identifier of the reranking model to use.
628
+ */
629
+ model : string ;
630
+
631
+ /**
632
+ * The search query to rank items against. Can be a string, text content part, or
633
+ * image content part. The input must not exceed the model's max input token
634
+ * length.
635
+ */
636
+ query :
637
+ | string
638
+ | InferenceRerankParams . OpenAIChatCompletionContentPartTextParam
639
+ | InferenceRerankParams . OpenAIChatCompletionContentPartImageParam ;
640
+
641
+ /**
642
+ * (Optional) Maximum number of results to return. Default: returns all.
643
+ */
644
+ max_num_results ?: number ;
645
+ }
646
+
647
+ export namespace InferenceRerankParams {
648
+ /**
649
+ * Text content part for OpenAI-compatible chat completion messages.
650
+ */
651
+ export interface OpenAIChatCompletionContentPartTextParam {
652
+ /**
653
+ * The text content of the message
654
+ */
655
+ text : string ;
656
+
657
+ /**
658
+ * Must be "text" to identify this as text content
659
+ */
660
+ type : 'text' ;
661
+ }
662
+
663
+ /**
664
+ * Image content part for OpenAI-compatible chat completion messages.
665
+ */
666
+ export interface OpenAIChatCompletionContentPartImageParam {
667
+ /**
668
+ * Image URL specification and processing details
669
+ */
670
+ image_url : OpenAIChatCompletionContentPartImageParam . ImageURL ;
671
+
672
+ /**
673
+ * Must be "image_url" to identify this as image content
674
+ */
675
+ type : 'image_url' ;
676
+ }
677
+
678
+ export namespace OpenAIChatCompletionContentPartImageParam {
679
+ /**
680
+ * Image URL specification and processing details
681
+ */
682
+ export interface ImageURL {
683
+ /**
684
+ * URL of the image to include in the message
685
+ */
686
+ url : string ;
687
+
688
+ /**
689
+ * (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
690
+ */
691
+ detail ?: string ;
692
+ }
693
+ }
694
+
695
+ /**
696
+ * Text content part for OpenAI-compatible chat completion messages.
697
+ */
698
+ export interface OpenAIChatCompletionContentPartTextParam {
699
+ /**
700
+ * The text content of the message
701
+ */
702
+ text : string ;
703
+
704
+ /**
705
+ * Must be "text" to identify this as text content
706
+ */
707
+ type : 'text' ;
708
+ }
709
+
710
+ /**
711
+ * Image content part for OpenAI-compatible chat completion messages.
712
+ */
713
+ export interface OpenAIChatCompletionContentPartImageParam {
714
+ /**
715
+ * Image URL specification and processing details
716
+ */
717
+ image_url : OpenAIChatCompletionContentPartImageParam . ImageURL ;
718
+
719
+ /**
720
+ * Must be "image_url" to identify this as image content
721
+ */
722
+ type : 'image_url' ;
723
+ }
724
+
725
+ export namespace OpenAIChatCompletionContentPartImageParam {
726
+ /**
727
+ * Image URL specification and processing details
728
+ */
729
+ export interface ImageURL {
730
+ /**
731
+ * URL of the image to include in the message
732
+ */
733
+ url : string ;
734
+
735
+ /**
736
+ * (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
737
+ */
738
+ detail ?: string ;
739
+ }
740
+ }
741
+ }
742
+
578
743
export declare namespace Inference {
579
744
export {
580
745
type ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk ,
581
746
type CompletionResponse as CompletionResponse ,
582
747
type EmbeddingsResponse as EmbeddingsResponse ,
583
748
type TokenLogProbs as TokenLogProbs ,
584
749
type InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse ,
750
+ type InferenceRerankResponse as InferenceRerankResponse ,
585
751
type InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams ,
586
752
type InferenceBatchCompletionParams as InferenceBatchCompletionParams ,
587
753
type InferenceChatCompletionParams as InferenceChatCompletionParams ,
@@ -591,5 +757,6 @@ export declare namespace Inference {
591
757
type InferenceCompletionParamsNonStreaming as InferenceCompletionParamsNonStreaming ,
592
758
type InferenceCompletionParamsStreaming as InferenceCompletionParamsStreaming ,
593
759
type InferenceEmbeddingsParams as InferenceEmbeddingsParams ,
760
+ type InferenceRerankParams as InferenceRerankParams ,
594
761
} ;
595
762
}
0 commit comments