llamastack
diff --git a/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions b/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎api.md‎
Lines changed: 22 additions & 7 deletions b/‎api.md‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎src/index.ts‎
Lines changed: 13 additions & 7 deletions b/‎src/index.ts‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎src/resources/index.ts‎
Lines changed: 4 additions & 1 deletion b/‎src/resources/index.ts‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/resources/inference.ts‎
Lines changed: 167 additions & 0 deletions b/‎src/resources/inference.ts‎
Lines changed: 167 additions & 0 deletions
@@ -1,4 +1,4 @@
-configured_endpoints: 107
+configured_endpoints: 111
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-f252873ea1e1f38fd207331ef2621c511154d5be3f4076e59cc15754fc58eee4.yml
 openapi_spec_hash: 10cbb4337a06a9fdd7d08612dd6044c3
-config_hash: 374d9711288576877a9fabb34e4da7b9
+config_hash: 0358112cc0f3d880b4d55debdbe1cfa3
@@ -81,12 +81,14 @@ Types:
 - <code><a href="./src/resources/responses/responses.ts">ResponseObject</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseObjectStream</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseListResponse</a></code>
+- <code><a href="./src/resources/responses/responses.ts">ResponseDeleteResponse</a></code>
 
 Methods:
 
 - <code title="post /v1/openai/v1/responses">client.responses.<a href="./src/resources/responses/responses.ts">create</a>({ ...params }) -> ResponseObject</code>
 - <code title="get /v1/openai/v1/responses/{response_id}">client.responses.<a href="./src/resources/responses/responses.ts">retrieve</a>(responseId) -> ResponseObject</code>
 - <code title="get /v1/openai/v1/responses">client.responses.<a href="./src/resources/responses/responses.ts">list</a>({ ...params }) -> ResponseListResponsesOpenAICursorPage</code>
+- <code title="delete /v1/openai/v1/responses/{response_id}">client.responses.<a href="./src/resources/responses/responses.ts">delete</a>(responseId) -> ResponseDeleteResponse</code>
 
 ## InputItems
 
@@ -224,6 +226,7 @@ Types:
 - <code><a href="./src/resources/inference.ts">EmbeddingsResponse</a></code>
 - <code><a href="./src/resources/inference.ts">TokenLogProbs</a></code>
 - <code><a href="./src/resources/inference.ts">InferenceBatchChatCompletionResponse</a></code>
+- <code><a href="./src/resources/inference.ts">InferenceRerankResponse</a></code>
 
 Methods:
 
@@ -232,6 +235,7 @@ Methods:
 - <code title="post /v1/inference/chat-completion">client.inference.<a href="./src/resources/inference.ts">chatCompletion</a>({ ...params }) -> ChatCompletionResponse</code>
 - <code title="post /v1/inference/completion">client.inference.<a href="./src/resources/inference.ts">completion</a>({ ...params }) -> CompletionResponse</code>
 - <code title="post /v1/inference/embeddings">client.inference.<a href="./src/resources/inference.ts">embeddings</a>({ ...params }) -> EmbeddingsResponse</code>
+- <code title="post /v1/inference/rerank">client.inference.<a href="./src/resources/inference.ts">rerank</a>({ ...params }) -> InferenceRerankResponse</code>
 
 # Embeddings
 
@@ -339,16 +343,26 @@ Methods:
 
 Types:
 
-- <code><a href="./src/resources/models.ts">ListModelsResponse</a></code>
-- <code><a href="./src/resources/models.ts">Model</a></code>
-- <code><a href="./src/resources/models.ts">ModelListResponse</a></code>
+- <code><a href="./src/resources/models/models.ts">ListModelsResponse</a></code>
+- <code><a href="./src/resources/models/models.ts">Model</a></code>
+- <code><a href="./src/resources/models/models.ts">ModelListResponse</a></code>
 
 Methods:
 
-- <code title="get /v1/models/{model_id}">client.models.<a href="./src/resources/models.ts">retrieve</a>(modelId) -> Model</code>
-- <code title="get /v1/models">client.models.<a href="./src/resources/models.ts">list</a>() -> ModelListResponse</code>
-- <code title="post /v1/models">client.models.<a href="./src/resources/models.ts">register</a>({ ...params }) -> Model</code>
-- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/resources/models.ts">unregister</a>(modelId) -> void</code>
+- <code title="get /v1/models/{model_id}">client.models.<a href="./src/resources/models/models.ts">retrieve</a>(modelId) -> Model</code>
+- <code title="get /v1/models">client.models.<a href="./src/resources/models/models.ts">list</a>() -> ModelListResponse</code>
+- <code title="post /v1/models">client.models.<a href="./src/resources/models/models.ts">register</a>({ ...params }) -> Model</code>
+- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/resources/models/models.ts">unregister</a>(modelId) -> void</code>
+
+## OpenAI
+
+Types:
+
+- <code><a href="./src/resources/models/openai.ts">OpenAIListResponse</a></code>
+
+Methods:
+
+- <code title="get /v1/openai/v1/models">client.models.openai.<a href="./src/resources/models/openai.ts">list</a>() -> OpenAIListResponse</code>
 
 # PostTraining
 
@@ -433,6 +447,7 @@ Methods:
 
 - <code title="get /v1/shields/{identifier}">client.shields.<a href="./src/resources/shields.ts">retrieve</a>(identifier) -> Shield</code>
 - <code title="get /v1/shields">client.shields.<a href="./src/resources/shields.ts">list</a>() -> ShieldListResponse</code>
+- <code title="delete /v1/shields/{identifier}">client.shields.<a href="./src/resources/shields.ts">delete</a>(identifier) -> void</code>
 - <code title="post /v1/shields">client.shields.<a href="./src/resources/shields.ts">register</a>({ ...params }) -> Shield</code>
 
 # SyntheticDataGeneration
 
@@ -64,16 +64,11 @@ import {
   InferenceCompletionParamsNonStreaming,
   InferenceCompletionParamsStreaming,
   InferenceEmbeddingsParams,
+  InferenceRerankParams,
+  InferenceRerankResponse,
   TokenLogProbs,
 } from './resources/inference';
 import { HealthInfo, Inspect, ProviderInfo, RouteInfo, VersionInfo } from './resources/inspect';
-import {
-  ListModelsResponse,
-  Model,
-  ModelListResponse,
-  ModelRegisterParams,
-  Models,
-} from './resources/models';
 import { CreateResponse, ModerationCreateParams, Moderations } from './resources/moderations';
 import { ListProvidersResponse, ProviderListResponse, Providers } from './resources/providers';
 import { ListRoutesResponse, RouteListResponse, Routes } from './resources/routes';
@@ -171,6 +166,13 @@ import {
   EvaluateResponse,
   Job,
 } from './resources/eval/eval';
+import {
+  ListModelsResponse,
+  Model,
+  ModelListResponse,
+  ModelRegisterParams,
+  Models,
+} from './resources/models/models';
 import {
   AlgorithmConfig,
   ListPostTrainingJobsResponse,
@@ -183,6 +185,7 @@ import {
   ResponseCreateParams,
   ResponseCreateParamsNonStreaming,
   ResponseCreateParamsStreaming,
+  ResponseDeleteResponse,
   ResponseListParams,
   ResponseListResponse,
   ResponseListResponsesOpenAICursorPage,
@@ -477,6 +480,7 @@ export declare namespace LlamaStackClient {
     type ResponseObject as ResponseObject,
     type ResponseObjectStream as ResponseObjectStream,
     type ResponseListResponse as ResponseListResponse,
+    type ResponseDeleteResponse as ResponseDeleteResponse,
     ResponseListResponsesOpenAICursorPage as ResponseListResponsesOpenAICursorPage,
     type ResponseCreateParams as ResponseCreateParams,
     type ResponseCreateParamsNonStreaming as ResponseCreateParamsNonStreaming,
@@ -537,6 +541,7 @@ export declare namespace LlamaStackClient {
     type EmbeddingsResponse as EmbeddingsResponse,
     type TokenLogProbs as TokenLogProbs,
     type InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse,
+    type InferenceRerankResponse as InferenceRerankResponse,
     type InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams,
     type InferenceBatchCompletionParams as InferenceBatchCompletionParams,
     type InferenceChatCompletionParams as InferenceChatCompletionParams,
@@ -546,6 +551,7 @@ export declare namespace LlamaStackClient {
     type InferenceCompletionParamsNonStreaming as InferenceCompletionParamsNonStreaming,
     type InferenceCompletionParamsStreaming as InferenceCompletionParamsStreaming,
     type InferenceEmbeddingsParams as InferenceEmbeddingsParams,
+    type InferenceRerankParams as InferenceRerankParams,
   };
 
   export {
 
@@ -69,6 +69,7 @@ export {
   type EmbeddingsResponse,
   type TokenLogProbs,
   type InferenceBatchChatCompletionResponse,
+  type InferenceRerankResponse,
   type InferenceBatchChatCompletionParams,
   type InferenceBatchCompletionParams,
   type InferenceChatCompletionParams,
@@ -78,6 +79,7 @@ export {
   type InferenceCompletionParamsNonStreaming,
   type InferenceCompletionParamsStreaming,
   type InferenceEmbeddingsParams,
+  type InferenceRerankParams,
 } from './inference';
 export { Inspect, type HealthInfo, type ProviderInfo, type RouteInfo, type VersionInfo } from './inspect';
 export {
@@ -86,7 +88,7 @@ export {
   type Model,
   type ModelListResponse,
   type ModelRegisterParams,
-} from './models';
+} from './models/models';
 export { Moderations, type CreateResponse, type ModerationCreateParams } from './moderations';
 export {
   PostTraining,
@@ -103,6 +105,7 @@ export {
   type ResponseObject,
   type ResponseObjectStream,
   type ResponseListResponse,
+  type ResponseDeleteResponse,
   type ResponseCreateParams,
   type ResponseCreateParamsNonStreaming,
   type ResponseCreateParamsStreaming,
 
@@ -95,6 +95,20 @@ export class Inference extends APIResource {
   ): Core.APIPromise<EmbeddingsResponse> {
     return this._client.post('/v1/inference/embeddings', { body, ...options });
   }
+
+  /**
+   * Rerank a list of documents based on their relevance to a query.
+   */
+  rerank(
+    body: InferenceRerankParams,
+    options?: Core.RequestOptions,
+  ): Core.APIPromise<InferenceRerankResponse> {
+    return (
+      this._client.post('/v1/inference/rerank', { body, ...options }) as Core.APIPromise<{
+        data: InferenceRerankResponse;
+      }>
+    )._thenUnwrap((obj) => obj.data);
+  }
 }
 
 /**
@@ -197,6 +211,29 @@ export interface InferenceBatchChatCompletionResponse {
   batch: Array<Shared.ChatCompletionResponse>;
 }
 
+/**
+ * List of rerank result objects, sorted by relevance score (descending)
+ */
+export type InferenceRerankResponse = Array<InferenceRerankResponse.InferenceRerankResponseItem>;
+
+export namespace InferenceRerankResponse {
+  /**
+   * A single rerank result from a reranking response.
+   */
+  export interface InferenceRerankResponseItem {
+    /**
+     * The original index of the document in the input list
+     */
+    index: number;
+
+    /**
+     * The relevance score from the model output. Values are inverted when applicable
+     * so that higher scores indicate greater relevance.
+     */
+    relevance_score: number;
+  }
+}
+
 export interface InferenceBatchChatCompletionParams {
   /**
    * The messages to generate completions for.
@@ -575,13 +612,142 @@ export interface InferenceEmbeddingsParams {
   text_truncation?: 'none' | 'start' | 'end';
 }
 
+export interface InferenceRerankParams {
+  /**
+   * List of items to rerank. Each item can be a string, text content part, or image
+   * content part. Each input must not exceed the model's max input token length.
+   */
+  items: Array<
+    | string
+    | InferenceRerankParams.OpenAIChatCompletionContentPartTextParam
+    | InferenceRerankParams.OpenAIChatCompletionContentPartImageParam
+  >;
+
+  /**
+   * The identifier of the reranking model to use.
+   */
+  model: string;
+
+  /**
+   * The search query to rank items against. Can be a string, text content part, or
+   * image content part. The input must not exceed the model's max input token
+   * length.
+   */
+  query:
+    | string
+    | InferenceRerankParams.OpenAIChatCompletionContentPartTextParam
+    | InferenceRerankParams.OpenAIChatCompletionContentPartImageParam;
+
+  /**
+   * (Optional) Maximum number of results to return. Default: returns all.
+   */
+  max_num_results?: number;
+}
+
+export namespace InferenceRerankParams {
+  /**
+   * Text content part for OpenAI-compatible chat completion messages.
+   */
+  export interface OpenAIChatCompletionContentPartTextParam {
+    /**
+     * The text content of the message
+     */
+    text: string;
+
+    /**
+     * Must be "text" to identify this as text content
+     */
+    type: 'text';
+  }
+
+  /**
+   * Image content part for OpenAI-compatible chat completion messages.
+   */
+  export interface OpenAIChatCompletionContentPartImageParam {
+    /**
+     * Image URL specification and processing details
+     */
+    image_url: OpenAIChatCompletionContentPartImageParam.ImageURL;
+
+    /**
+     * Must be "image_url" to identify this as image content
+     */
+    type: 'image_url';
+  }
+
+  export namespace OpenAIChatCompletionContentPartImageParam {
+    /**
+     * Image URL specification and processing details
+     */
+    export interface ImageURL {
+      /**
+       * URL of the image to include in the message
+       */
+      url: string;
+
+      /**
+       * (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
+       */
+      detail?: string;
+    }
+  }
+
+  /**
+   * Text content part for OpenAI-compatible chat completion messages.
+   */
+  export interface OpenAIChatCompletionContentPartTextParam {
+    /**
+     * The text content of the message
+     */
+    text: string;
+
+    /**
+     * Must be "text" to identify this as text content
+     */
+    type: 'text';
+  }
+
+  /**
+   * Image content part for OpenAI-compatible chat completion messages.
+   */
+  export interface OpenAIChatCompletionContentPartImageParam {
+    /**
+     * Image URL specification and processing details
+     */
+    image_url: OpenAIChatCompletionContentPartImageParam.ImageURL;
+
+    /**
+     * Must be "image_url" to identify this as image content
+     */
+    type: 'image_url';
+  }
+
+  export namespace OpenAIChatCompletionContentPartImageParam {
+    /**
+     * Image URL specification and processing details
+     */
+    export interface ImageURL {
+      /**
+       * URL of the image to include in the message
+       */
+      url: string;
+
+      /**
+       * (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
+       */
+      detail?: string;
+    }
+  }
+}
+
 export declare namespace Inference {
   export {
     type ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk,
     type CompletionResponse as CompletionResponse,
     type EmbeddingsResponse as EmbeddingsResponse,
     type TokenLogProbs as TokenLogProbs,
     type InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse,
+    type InferenceRerankResponse as InferenceRerankResponse,
     type InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams,
     type InferenceBatchCompletionParams as InferenceBatchCompletionParams,
     type InferenceChatCompletionParams as InferenceChatCompletionParams,
@@ -591,5 +757,6 @@ export declare namespace Inference {
     type InferenceCompletionParamsNonStreaming as InferenceCompletionParamsNonStreaming,
     type InferenceCompletionParamsStreaming as InferenceCompletionParamsStreaming,
     type InferenceEmbeddingsParams as InferenceEmbeddingsParams,
+    type InferenceRerankParams as InferenceRerankParams,
   };
 }