diff --git a/openapi.yaml b/openapi.yaml
index 7ce21e7..142a201 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -15,922 +15,846 @@ servers:
 security:
   - bearerAuth: []
 paths:
-  /voices:
+  /deployments:
     get:
-      tags: ['Voices']
-      summary: Fetch available voices for each model
-      description: Fetch available voices for each model
-      operationId: fetchVoices
+      description: Get a list of all deployments in your project
       responses:
-        '200':
-          description: Success
+        "200":
+          description: List of deployments
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListVoicesResponse'
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-  /videos/{id}:
-    get:
-      tags: ['Video']
-      summary: Fetch video metadata
-      description: Fetch video metadata
-      servers:
-        - url: https://api.together.xyz/v2
-      operationId: retrieveVideo
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Identifier of video from create response.
-      responses:
-        '200':
-          description: Success
+                $ref: "#/components/schemas/DeploymentListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Video ID not found.
-  /videos:
+                type: object
+      summary: Get the list of deployments
+      tags:
+        - Deployments
     post:
-      tags: ['Video']
-      summary: Create video
-      description: Create a video
-      operationId: createVideo
-      servers:
-        - url: https://api.together.xyz/v2
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
- 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-            
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
+      description: Create a new deployment with specified configuration
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateVideoBody'
+              $ref: "#/components/schemas/CreateDeploymentRequest"
+        description: Deployment configuration
+        required: true
       responses:
-        '200':
-          description: Success
+        "200":
+          description: Deployment created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-  /chat/completions:
-    post:
-      tags: ['Chat']
-      summary: Create chat completion
-      description: Query a chat model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-
-            print(response.choices[0].message.content)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/chat/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "messages": [
-                     {"role": "system", "content": "You are a helpful assistant."},
-                     {"role": "user", "content": "What are some fun things to do in New York?"}
-                   ]
-                 }'
-      operationId: chat-completions
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-      responses:
-        '200':
-          description: '200'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-            text/event-stream:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+      summary: Create a new deployment
+      tags:
+        - Deployments
+  "/v1/deployments/{id}":
+    delete:
+      description: Delete an existing deployment
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Delete a deployment
+      tags:
+        - Deployments
+    get:
+      description: Retrieve details of a specific deployment by its ID or name
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment details
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /completions:
-    post:
-      tags: ['Completion']
-      summary: Create completion
-      description: Query a language, code, or image model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1,
-            });
-
-            console.log(response.choices[0].text);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1
-            });
-
-            console.log(response.choices[0].text);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "prompt": "The largest city in France is",
-                   "max_tokens": 1
-                 }'
-      operationId: completions
+                type: object
+      summary: Get a deployment by ID or name
+      tags:
+        - Deployments
+    patch:
+      description: Update an existing deployment configuration
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CompletionRequest'
+              $ref: "#/components/schemas/UpdateDeploymentRequest"
+        description: Updated deployment configuration
+        required: true
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: Deployment updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CompletionResponse'
-            text/event-stream:
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/CompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+      summary: Update a deployment
+      tags:
+        - Deployments
+  "/v1/deployments/{id}/logs":
+    get:
+      description: Retrieve logs from a deployment, optionally filtered by replica ID.
+        Use follow=true to stream logs in real-time.
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+        - description: Replica ID to filter logs
+          in: query
+          name: replica_id
+          schema:
+            type: string
+        - description: Stream logs in real-time (ndjson format)
+          in: query
+          name: follow
+          schema:
+            type: boolean
+      responses:
+        "200":
+          description: Deployment logs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentLogs"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /embeddings:
-    post:
-      tags: ['Embeddings']
-      summary: Create embedding
-      description: Query an embedding model for a given string of text.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/embeddings" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "BAAI/bge-large-en-v1.5",
-                   "input": "New York City"
-                 }'
-      operationId: embeddings
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
+                type: object
+      summary: Get logs for a deployment
+      tags:
+        - Deployments
+  /image-repositories:
+    get:
+      description: Retrieve all container image repositories available in your project
       responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+        "200":
+          description: List of repositories
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                $ref: "#/components/schemas/RepositoryListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Get the list of image repositories in your project
+      tags:
+        - Images
+  "/v1/image-repositories/{id}/images":
+    get:
+      description: Retrieve all container images (tags) available in a specific repository
+      parameters:
+        - description: Repository ID
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: List of images
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/ImageListResponse"
+        "404":
+          description: Repository not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /models:
+                type: object
+      summary: Get the list of images available under a repository
+      tags:
+        - Images
+  /secrets:
     get:
-      tags: ['Models']
-      summary: List all models
-      description: Lists all of Together's open-source models
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: models
+      description: Retrieve all secrets in your project
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: List of secrets
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ModelInfoList'
-        '400':
-          description: 'BadRequest'
+                $ref: "#/components/schemas/ListSecretsResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+      summary: Get the list of project secrets
+      tags:
+        - Secrets
+    post:
+      description: Create a new secret to store sensitive configuration values
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateSecretRequest"
+        description: Secret configuration
+        required: true
+      responses:
+        "200":
+          description: Secret created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+      summary: Create a new secret
+      tags:
+        - Secrets
+  "/v1/secrets/{id}":
+    delete:
+      description: Delete an existing secret
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-    post:
-      tags: ['Models']
-      summary: Upload a custom model or adapter
-      description: Upload a custom model or adapter from Hugging Face or S3
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                "My-Fine-Tuned-Model",
-                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.job_id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                model_name="My-Fine-Tuned-Model",
-                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.data.job_id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                    "model_name": "My-Fine-Tuned-Model",
-                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
-                  }'
-      operationId: uploadModel
+                type: object
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a secret
+      tags:
+        - Secrets
+    get:
+      description: Retrieve details of a specific secret by its ID or name
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a secret by ID or name
+      tags:
+        - Secrets
+    patch:
+      description: Update an existing secret's value or metadata
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/ModelUploadRequest'
+              $ref: "#/components/schemas/UpdateSecretRequest"
+        description: Updated secret configuration
+        required: true
       responses:
-        '200':
-          description: Model / adapter upload job created successfully
+        "200":
+          description: Secret updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ModelUploadSuccessResponse'
-
-  /jobs/{jobId}:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a secret
+      tags:
+        - Secrets
+  "/v1/storage/{filename}":
     get:
-      tags: ['Jobs']
-      summary: Get job status
-      description: Get the status of a specific job
-      operationId: getJob
+      description: Download a file by redirecting to a signed URL
       parameters:
-        - name: jobId
+        - description: Filename
           in: path
+          name: filename
           required: true
           schema:
             type: string
-          description: The ID of the job to retrieve
-          example: job-a15dad11-8d8e-4007-97c5-a211304de284
       responses:
-        '200':
-          description: Job status retrieved successfully
+        "307":
+          description: Redirect to signed download URL
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-  /jobs:
+                type: string
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Download a file
+      tags:
+        - files
+  "/v1/storage/{filename}/url":
     get:
-      tags: ['Jobs']
-      summary: List all jobs
-      description: List all jobs and their statuses
-      operationId: listJobs
+      description: Get a presigned download URL for a file
+      parameters:
+        - description: Filename
+          in: path
+          name: filename
+          required: true
+          schema:
+            type: string
       responses:
-        '200':
-          description: Jobs retrieved successfully
+        "200":
+          description: Signed URL
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobsInfoSuccessResponse'
-
-  /images/generations:
+                $ref: "#/components/schemas/api_v1.SignedURLResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Get a signed URL for a file
+      tags:
+        - files
+  /storage/multipart/abort:
     post:
-      tags: ['Images']
-      summary: Create image
-      description: Use an image model to generate an image for a given prompt.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
-
-            print(response.data[0].url)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
+      description: Abort a multi-part upload and discard all uploaded parts
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.AbortMultiPartRequest"
+        description: Abort multi-part upload request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload aborted successfully
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Abort multi-part upload
+      tags:
+        - files
+  /storage/multipart/complete:
+    post:
+      description: Complete a multi-part upload by providing all part ETags
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.CompleteMultiPartRequest"
+        description: Complete multi-part upload request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload completed
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.CompleteUploadResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Complete multi-part upload
+      tags:
+        - files
+  /storage/multipart/init:
+    post:
+      description: Initiate a multi-part upload and get presigned URLs for each part
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.InitiateMultiPartRequest"
+        description: Multi-part upload init request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload info
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.MultiPartInitResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Initiate multi-part upload
+      tags:
+        - files
+  /storage/upload-request:
+    post:
+      description: Request a presigned upload URL for a file
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.FileRequest"
+        description: Upload request
+        required: true
+      responses:
+        "200":
+          description: Upload URL and path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.UploadResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Request an upload URL for a file
+      tags:
+        - files
+  /storage/volumes:
+    get:
+      description: Retrieve all volumes in your project
+      responses:
+        "200":
+          description: List of volumes
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ListVolumesResponse"
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get the list of project volumes
+      tags:
+        - Volumes
+    post:
+      description: Create a new volume to preload files in deployments
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateVolumeRequest"
+        description: Volume configuration
+        required: true
+      responses:
+        "200":
+          description: Volume created successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Create a new volume
+      tags:
+        - Volumes
+  "/v1/storage/volumes/{id}":
+    delete:
+      description: Delete an existing volume
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume deleted successfully
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a volume
+      tags:
+        - Volumes
+    get:
+      description: Retrieve details of a specific volume by its ID or name
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a volume by ID or name
+      tags:
+        - Volumes
+    patch:
+      description: Update an existing volume's configuration or contents
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/UpdateVolumeRequest"
+        description: Updated volume configuration
+        required: true
+      responses:
+        "200":
+          description: Volume updated successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a volume
+      tags:
+        - Volumes
+  /voices:
+    get:
+      tags: ['Voices']
+      summary: Fetch available voices for each model
+      description: Fetch available voices for each model
+      operationId: fetchVoices
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListVoicesResponse'
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
+            response = client.audio.voices.list()
+
+            print(response.data)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            print(response.data[0].url)
+            response = client.audio.voices.list()
+
+            print(response.data)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -940,12 +864,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.audio.voices.list()
 
-            console.log(response.data[0].url);
+            console.log(response.data);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -955,126 +876,17 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.audio.voices.list()
 
-            console.log(response.data[0].url);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/images/generations" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "black-forest-labs/FLUX.1-schnell",
-                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
-                 }'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - prompt
-                - model
-              properties:
-                prompt:
-                  type: string
-                  description: A description of the desired images. Maximum length varies by model.
-                  example: cat floating in space, cinematic
-                model:
-                  type: string
-                  description: >
-                    The model to use for image generation.<br>
-                    <br>
-                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
-                  example: black-forest-labs/FLUX.1-schnell
-                  anyOf:
-                    - type: string
-                      enum:
-                        - black-forest-labs/FLUX.1-schnell-Free
-                        - black-forest-labs/FLUX.1-schnell
-                        - black-forest-labs/FLUX.1.1-pro
-                    - type: string
-                steps:
-                  type: integer
-                  default: 20
-                  description: Number of generation steps.
-                image_url:
-                  type: string
-                  description: URL of an image to use for image models that support it.
-                seed:
-                  type: integer
-                  description: Seed used for generation. Can be used to reproduce image generations.
-                n:
-                  type: integer
-                  default: 1
-                  description: Number of image results to generate.
-                height:
-                  type: integer
-                  default: 1024
-                  description: Height of the image to generate in number of pixels.
-                width:
-                  type: integer
-                  default: 1024
-                  description: Width of the image to generate in number of pixels.
-                negative_prompt:
-                  type: string
-                  description: The prompt or prompts not to guide the image generation.
-                response_format:
-                  type: string
-                  description: Format of the image response. Can be either a base64 string or a URL.
-                  enum:
-                    - base64
-                    - url
-                guidance_scale:
-                  type: number
-                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
-                  default: 3.5
-                output_format:
-                  type: string
-                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
-                  default: jpeg
-                  enum:
-                    - jpeg
-                    - png
-                image_loras:
-                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
-                  type: array
-                  items:
-                    type: object
-                    required: [path, scale]
-                    properties:
-                      path:
-                        type: string
-                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
-                      scale:
-                        type: number
-                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
-                reference_images:
-                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
-                  type: array
-                  items:
-                    type: string
-                    description: URL of a reference image to guide the image generation.
-                disable_safety_checker:
-                  type: boolean
-                  description: If true, disables the safety checker for image generation.
-      responses:
-        '200':
-          description: Image generated successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ImageResponse'
-  /files:
+            console.log(response.data);
+  /videos/{id}:
     get:
-      tags: ['Files']
-      summary: List all files
-      description: List the metadata for all uploaded data files.
+      tags: ['Video']
+      summary: Fetch video metadata
+      description: Fetch video metadata
+      servers:
+        - url: https://api.together.xyz/v2
+      operationId: retrieveVideo
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1087,10 +899,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.videos.retrieve(video_id)
 
-            for file in response.data:
-                print(file.id)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1101,10 +912,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.videos.retrieve(video_id)
 
-            for file in response.data:
-                print(file.id)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1114,11 +924,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.videos.retrieve(videoId);
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
+            console.log(response.status);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1128,29 +936,35 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.videos.retrieve(videoId);
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            console.log(response.status);
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Identifier of video from create response.
       responses:
         '200':
-          description: List of files
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileList'
-  /files/{id}:
-    get:
-      tags: ['Files']
-      summary: List file
-      description: List the metadata for a single uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Video ID not found.
+  /videos:
+    post:
+      tags: ['Video']
+      summary: Create video
+      description: Create a video
+      operationId: createVideo
+      servers:
+        - url: https://api.together.xyz/v2
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1158,14 +972,17 @@ paths:
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
             import os
-
+ 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(file)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1176,9 +993,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(file)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1187,10 +1007,13 @@ paths:
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
+            
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            const file = await client.files.retrieve("file-id");
-
-            console.log(file);
+            console.log(response.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1200,32 +1023,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(file);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files/ID" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(response.id);
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateVideoBody'
       responses:
         '200':
-          description: File retrieved successfully
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-    delete:
-      tags: ['Files']
-      summary: Delete a file
-      description: Delete a previously uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+  /chat/completions:
+    post:
+      tags: ['Chat']
+      summary: Create chat completion
+      description: Query a chat model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1238,11 +1058,17 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
-
-            print(response)
-        - lang: Python
-          label: Together AI SDK (v2)
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
+
+            print(response.choices[0].message.content)
+        - lang: Python
+          label: Together AI SDK (v2)
           source: |
             from together import Together
             import os
@@ -1251,9 +1077,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
-
-            print(response)
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1263,9 +1093,15 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(response);
+            console.log(response.choices[0].message?.content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1275,32 +1111,86 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(response);
+            console.log(response.choices[0].message?.content);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            curl -X POST "https://api.together.xyz/v1/chat/completions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "messages": [
+                     {"role": "system", "content": "You are a helpful assistant."},
+                     {"role": "user", "content": "What are some fun things to do in New York?"}
+                   ]
+                 }'
+      operationId: chat-completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatCompletionRequest'
       responses:
         '200':
-          description: File deleted successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileDeleteResponse'
-  /files/{id}/content:
-    get:
-      tags: ['Files']
-      summary: Get file contents
-      description: Get the contents of a single uploaded data file.
+                $ref: '#/components/schemas/ChatCompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionStream'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /completions:
+    post:
+      tags: ['Completion']
+      summary: Create completion
+      description: Query a language, code, or image model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1313,9 +1203,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve_content(id="file-id")
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
 
-            print(file.filename)
+            print(response.choices[0].text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1326,9 +1220,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            with client.files.with_streaming_response.content(id="file-id") as response:
-              for line in response.iter_lines():
-                print(line)
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
+
+            print(response.choices[0].text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1338,10 +1236,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1,
+            });
 
-            console.log(content);
+            console.log(response.choices[0].text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1351,40 +1252,82 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1
+            });
 
-            console.log(content);
+            console.log(response.choices[0].text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/file-id/content" \
+            curl -X POST "https://api.together.xyz/v1/completions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "prompt": "The largest city in France is",
+                   "max_tokens": 1
+                 }'
+      operationId: completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CompletionRequest'
       responses:
         '200':
-          description: File content retrieved successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileObject'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/CompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/CompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /files/upload:
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /embeddings:
     post:
-      tags: ['Files']
-      summary: Upload a file
-      description: Upload a file with specified purpose, file name, and file type.
+      tags: ['Embeddings']
+      summary: Create embedding
+      description: Query an embedding model for a given string of text.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1397,11 +1340,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
 
-            print(file.id)
+            print(response.data[0].embedding)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1412,98 +1356,214 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
 
-            print(file.id)
+            print(response.data[0].embedding)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/upload" \
+            curl -X POST "https://api.together.xyz/v1/embeddings" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@/path/to/data.jsonl" \
-                 -F "file_name=data.jsonl" \
-                 -F "purpose=fine-tune"
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "BAAI/bge-large-en-v1.5",
+                   "input": "New York City"
+                 }'
+      operationId: embeddings
       requestBody:
-        required: true
         content:
-          multipart/form-data:
+          application/json:
             schema:
-              type: object
-              required:
-                - purpose
-                - file_name
-                - file
-              properties:
-                purpose:
-                  $ref: '#/components/schemas/FilePurpose'
-                file_name:
-                  type: string
-                  description: The name of the file being uploaded
-                  example: 'dataset.csv'
-                file_type:
-                  $ref: '#/components/schemas/FileType'
-                file:
-                  type: string
-                  format: binary
-                  description: The content of the file being uploaded
+              $ref: '#/components/schemas/EmbeddingsRequest'
       responses:
         '200':
-          description: File uploaded successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /models:
+    get:
+      tags: ['Models']
+      summary: List all models
+      description: Lists all of Together's open-source models
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/models" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: models
+      responses:
+        '200':
+          description: '200'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelInfoList'
         '400':
-          description: Bad Request
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /fine-tunes:
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
     post:
-      tags: ['Fine-tuning']
-      summary: Create job
-      description: Create a fine-tuning job with the provided model and training data.
+      tags: ['Models']
+      summary: Upload a custom model or adapter
+      description: Upload a custom model or adapter from Hugging Face or S3
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1516,12 +1576,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.models.upload(
+                "My-Fine-Tuned-Model",
+                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
             )
 
-            print(response)
+            print(response.job_id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1532,12 +1592,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.models.upload(
+                model_name="My-Fine-Tuned-Model",
+                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
             )
 
-            print(response)
+            print(response.data.job_id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1547,10 +1607,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
             console.log(response);
         - lang: JavaScript
@@ -1562,224 +1622,78 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
             console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/models" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                   "training_file": "file-id"
-                 }'
+                    "model_name": "My-Fine-Tuned-Model",
+                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
+                  }'
+      operationId: uploadModel
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - training_file
-                - model
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_checkpoints:
-                  type: integer
-                  default: 1
-                  description: Number of intermediate model versions saved during training for evaluation
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                batch_size:
-                  oneOf:
-                    - type: integer
-                    - type: string
-                      enum:
-                        - max
-                  default: 'max'
-                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
-                learning_rate:
-                  type: number
-                  format: float
-                  default: 0.00001
-                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
-                lr_scheduler:
-                  type: object
-                  default: none
-                  $ref: '#/components/schemas/LRScheduler'
-                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
-                warmup_ratio:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: The percent of steps at the start of training to linearly increase the learning rate.
-                max_grad_norm:
-                  type: number
-                  format: float
-                  default: 1.0
-                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
-                weight_decay:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: Weight decay. Regularization parameter for the optimizer.
-                suffix:
-                  type: string
-                  description: Suffix that will be added to your fine-tuned model name
-                wandb_api_key:
-                  type: string
-                  description: Integration key for tracking experiments and model metrics on W&B platform
-                wandb_base_url:
-                  type: string
-                  description: The base URL of a dedicated Weights & Biases instance.
-                wandb_project_name:
-                  type: string
-                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
-                wandb_name:
-                  type: string
-                  description: The Weights & Biases name for your run.
-                train_on_inputs:
-                  oneOf:
-                    - type: boolean
-                    - type: string
-                      enum:
-                        - auto
-                  type: boolean
-                  default: auto
-                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
-                  deprecated: true
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
-                from_hf_model:
-                  type: string
-                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
-                hf_model_revision:
-                  type: string
-                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
-                hf_api_token:
-                  type: string
-                  description: The API token for the Hugging Face Hub.
-                hf_output_repo_name:
-                  type: string
-                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+              $ref: '#/components/schemas/ModelUploadRequest'
       responses:
         '200':
-          description: Fine-tuning job initiated successfully
+          description: Model / adapter upload job created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
-    get:
-      tags: ['Fine-tuning']
-      summary: List all jobs
-      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.fine_tuning.list()
-
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.fine_tuning.list()
-
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.fineTuning.list();
-
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.fineTuning.list();
+                $ref: '#/components/schemas/ModelUploadSuccessResponse'
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/fine-tunes" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+  /jobs/{jobId}:
+    get:
+      tags: ['Jobs']
+      summary: Get job status
+      description: Get the status of a specific job
+      operationId: getJob
+      parameters:
+        - name: jobId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the job to retrieve
+          example: job-a15dad11-8d8e-4007-97c5-a211304de284
       responses:
         '200':
-          description: List of fine-tune jobs
+          description: Job status retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneTruncatedList'
-  /fine-tunes/{id}:
+                $ref: '#/components/schemas/JobInfoSuccessResponse'
+
+  /jobs:
     get:
-      tags: ['Fine-tuning']
-      summary: List job
-      description: List the metadata for a single fine-tuning job.
+      tags: ['Jobs']
+      summary: List all jobs
+      description: List all jobs and their statuses
+      operationId: listJobs
+      responses:
+        '200':
+          description: Jobs retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobsInfoSuccessResponse'
+
+  /images/generations:
+    post:
+      tags: ['Images']
+      summary: Create image
+      description: Use an image model to generate an image for a given prompt.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1792,9 +1706,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            print(fine_tune)
+            print(response.data[0].url)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1805,9 +1723,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            print(fine_tune)
+            print(response.data[0].url)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1817,9 +1739,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(fineTune);
+            console.log(response.data[0].url);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1829,32 +1754,126 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(fineTune);
+            console.log(response.data[0].url);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
+            curl -X POST "https://api.together.xyz/v1/images/generations" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
-      responses:
-        '200':
-          description: Fine-tune job details retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneResponse'
-    delete:
-      tags: ['Fine-tuning']
-      summary: Delete a fine-tune job
-      description: Delete a fine-tuning job.
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "black-forest-labs/FLUX.1-schnell",
+                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
+                 }'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - prompt
+                - model
+              properties:
+                prompt:
+                  type: string
+                  description: A description of the desired images. Maximum length varies by model.
+                  example: cat floating in space, cinematic
+                model:
+                  type: string
+                  description: >
+                    The model to use for image generation.<br>
+                    <br>
+                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
+                  example: black-forest-labs/FLUX.1-schnell
+                  anyOf:
+                    - type: string
+                      enum:
+                        - black-forest-labs/FLUX.1-schnell-Free
+                        - black-forest-labs/FLUX.1-schnell
+                        - black-forest-labs/FLUX.1.1-pro
+                    - type: string
+                steps:
+                  type: integer
+                  default: 20
+                  description: Number of generation steps.
+                image_url:
+                  type: string
+                  description: URL of an image to use for image models that support it.
+                seed:
+                  type: integer
+                  description: Seed used for generation. Can be used to reproduce image generations.
+                n:
+                  type: integer
+                  default: 1
+                  description: Number of image results to generate.
+                height:
+                  type: integer
+                  default: 1024
+                  description: Height of the image to generate in number of pixels.
+                width:
+                  type: integer
+                  default: 1024
+                  description: Width of the image to generate in number of pixels.
+                negative_prompt:
+                  type: string
+                  description: The prompt or prompts not to guide the image generation.
+                response_format:
+                  type: string
+                  description: Format of the image response. Can be either a base64 string or a URL.
+                  enum:
+                    - base64
+                    - url
+                guidance_scale:
+                  type: number
+                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
+                  default: 3.5
+                output_format:
+                  type: string
+                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
+                  default: jpeg
+                  enum:
+                    - jpeg
+                    - png
+                image_loras:
+                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
+                  type: array
+                  items:
+                    type: object
+                    required: [path, scale]
+                    properties:
+                      path:
+                        type: string
+                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
+                      scale:
+                        type: number
+                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
+                reference_images:
+                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
+                  type: array
+                  items:
+                    type: string
+                    description: URL of a reference image to guide the image generation.
+                disable_safety_checker:
+                  type: boolean
+                  description: If true, disables the safety checker for image generation.
+      responses:
+        '200':
+          description: Image generated successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ImageResponse'
+  /files:
+    get:
+      tags: ['Files']
+      summary: List all files
+      description: List the metadata for all uploaded data files.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1867,9 +1886,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            response = client.files.list()
 
-            print(response)
+            for file in response.data:
+                print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1880,9 +1900,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            response = client.files.list()
 
-            print(response)
+            for file in response.data:
+                print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1892,9 +1913,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const response = await client.files.list();
 
-            console.log(response);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1904,50 +1927,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const response = await client.files.list();
 
-            console.log(response);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
+            curl "https://api.together.xyz/v1/files" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
-        - name: force
-          in: query
-          schema:
-            type: boolean
-            default: false
       responses:
         '200':
-          description: Fine-tune job deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneDeleteResponse'
-        '404':
-          description: Fine-tune job not found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: Internal server error
+          description: List of files
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /fine-tunes/{id}/events:
+                $ref: '#/components/schemas/FileList'
+  /files/{id}:
     get:
-      tags: ['Fine-tuning']
-      summary: List job events
-      description: List the events for a single fine-tuning job.
+      tags: ['Files']
+      summary: List file
+      description: List the metadata for a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1960,9 +1962,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            events = client.fine_tuning.list_events(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            print(events)
+            print(file)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1973,10 +1975,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list_events(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            for event in response.data:
-                print(event)
+            print(file)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1986,9 +1987,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(events);
+            console.log(file);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1998,13 +1999,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(events);
+            console.log(file);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
+            curl "https://api.together.xyz/v1/files/ID" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2015,16 +2016,15 @@ paths:
             type: string
       responses:
         '200':
-          description: List of fine-tune events
+          description: File retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListEvents'
-  /fine-tunes/{id}/checkpoints:
-    get:
-      tags: ['Fine-tuning']
-      summary: List checkpoints
-      description: List the checkpoints for a single fine-tuning job.
+                $ref: '#/components/schemas/FileResponse'
+    delete:
+      tags: ['Files']
+      summary: Delete a file
+      description: Delete a previously uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2037,9 +2037,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            response = client.files.delete(id="file-id")
 
-            print(checkpoints)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2050,9 +2050,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            response = client.files.delete(id="file-id")
 
-            print(checkpoints)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2062,9 +2062,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const response = await client.files.delete("file-id");
 
-            console.log(checkpoints);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2074,15 +2074,14 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const response = await client.files.delete("file-id");
 
-            console.log(checkpoints);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
       parameters:
         - name: id
           in: path
@@ -2091,16 +2090,16 @@ paths:
             type: string
       responses:
         '200':
-          description: List of fine-tune checkpoints
+          description: File deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListCheckpoints'
-  /finetune/download:
+                $ref: '#/components/schemas/FileDeleteResponse'
+  /files/{id}/content:
     get:
-      tags: ['Fine-tuning']
-      summary: Download model
-      description: Receive a compressed fine-tuned model or checkpoint.
+      tags: ['Files']
+      summary: Get file contents
+      description: Get the contents of a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2113,10 +2112,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # This will download the content to a location on disk
-            response = client.fine_tuning.download(id="ft-id")
+            file = client.files.retrieve_content(id="file-id")
 
-            print(response)
+            print(file.filename)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2127,13 +2125,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # Using `with_streaming_response` gives you control to do what you want with the response.
-            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
-
-            with stream as response:
-                for line in response.iter_lines():
-                    print(line)
-
+            with client.files.with_streaming_response.content(id="file-id") as response:
+              for line in response.iter_lines():
+                print(line)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2143,11 +2137,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(await response.blob());
+            console.log(content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2157,56 +2150,40 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(await response.blob());
+            console.log(content);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
+            curl "https://api.together.xyz/v1/files/file-id/content" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
-        - in: query
-          name: ft_id
-          schema:
-            type: string
+        - name: id
+          in: path
           required: true
-          description: Fine-tune ID to download. A string that starts with `ft-`.
-        - in: query
-          name: checkpoint_step
-          schema:
-            type: integer
-          required: false
-          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
-        - in: query
-          name: checkpoint
           schema:
             type: string
-            enum:
-              - merged
-              - adapter
-              - model_output_path
-          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: Successfully downloaded the fine-tuned model or checkpoint.
+          description: File content retrieved successfully
           content:
-            application/octet-stream:
+            application/json:
               schema:
-                type: string
-                format: binary
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /fine-tunes/{id}/cancel:
+                $ref: '#/components/schemas/FileObject'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /files/upload:
     post:
-      tags: ['Fine-tuning']
-      summary: Cancel job
-      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
+      tags: ['Files']
+      summary: Upload a file
+      description: Upload a file with specified purpose, file name, and file type.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2219,9 +2196,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            print(response)
+            print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2232,62 +2211,98 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            print(response)
+            print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            console.log(response);
+            console.log(file.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            console.log(response);
+            console.log(file.id);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
+            curl "https://api.together.xyz/v1/files/upload" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Fine-tune ID to cancel. A string that starts with `ft-`.
+                 -F "file=@/path/to/data.jsonl" \
+                 -F "file_name=data.jsonl" \
+                 -F "purpose=fine-tune"
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - purpose
+                - file_name
+                - file
+              properties:
+                purpose:
+                  $ref: '#/components/schemas/FilePurpose'
+                file_name:
+                  type: string
+                  description: The name of the file being uploaded
+                  example: 'dataset.csv'
+                file_type:
+                  $ref: '#/components/schemas/FileType'
+                file:
+                  type: string
+                  format: binary
+                  description: The content of the file being uploaded
       responses:
         '200':
-          description: Successfully cancelled the fine-tuning job.
+          description: File uploaded successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
+                $ref: '#/components/schemas/FileResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
         '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /rerank:
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /fine-tunes:
     post:
-      tags: ['Rerank']
-      summary: Create a rerank request
-      description: Query a reranker model
+      tags: ['Fine-tuning']
+      summary: Create job
+      description: Create a fine-tuning job with the provided model and training data.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2300,35 +2315,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2339,35 +2331,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2377,34 +2346,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
             });
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2414,115 +2361,148 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
             });
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/rerank" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "Salesforce/Llama-Rank-v1",
-                   "query": "What animals can I find near Peru?",
-                   "documents": [{
-                      "title": "Llama",
-                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                    },
-                    {
-                      "title": "Panda",
-                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                    },
-                    {
-                      "title": "Guanaco",
-                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                    },
-                    {
-                      "title": "Wild Bactrian camel",
-                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                    }]
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                   "training_file": "file-id"
                  }'
-      operationId: rerank
       requestBody:
+        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RerankRequest'
+              type: object
+              required:
+                - training_file
+                - model
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_checkpoints:
+                  type: integer
+                  default: 1
+                  description: Number of intermediate model versions saved during training for evaluation
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                batch_size:
+                  oneOf:
+                    - type: integer
+                    - type: string
+                      enum:
+                        - max
+                  default: 'max'
+                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
+                learning_rate:
+                  type: number
+                  format: float
+                  default: 0.00001
+                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
+                lr_scheduler:
+                  type: object
+                  default: none
+                  $ref: '#/components/schemas/LRScheduler'
+                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
+                warmup_ratio:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: The percent of steps at the start of training to linearly increase the learning rate.
+                max_grad_norm:
+                  type: number
+                  format: float
+                  default: 1.0
+                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
+                weight_decay:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: Weight decay. Regularization parameter for the optimizer.
+                suffix:
+                  type: string
+                  description: Suffix that will be added to your fine-tuned model name
+                wandb_api_key:
+                  type: string
+                  description: Integration key for tracking experiments and model metrics on W&B platform
+                wandb_base_url:
+                  type: string
+                  description: The base URL of a dedicated Weights & Biases instance.
+                wandb_project_name:
+                  type: string
+                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
+                wandb_name:
+                  type: string
+                  description: The Weights & Biases name for your run.
+                train_on_inputs:
+                  oneOf:
+                    - type: boolean
+                    - type: string
+                      enum:
+                        - auto
+                  type: boolean
+                  default: auto
+                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
+                  deprecated: true
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+                from_hf_model:
+                  type: string
+                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
+                hf_model_revision:
+                  type: string
+                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
+                hf_api_token:
+                  type: string
+                  description: The API token for the Hugging Face Hub.
+                hf_output_repo_name:
+                  type: string
+                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RerankResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+          description: Fine-tuning job initiated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /audio/speech:
-    post:
-      tags: ['Audio']
-      summary: Create audio generation request
-      description: Generate audio from input text
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+    get:
+      tags: ['Fine-tuning']
+      summary: List all jobs
+      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2535,13 +2515,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            response = client.fine_tuning.list()
 
-            response.stream_to_file("audio.wav")
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2552,545 +2529,224 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.with_streaming_response.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            response = client.fine_tuning.list()
 
-            with response as stream:
-              stream.stream_to_file("audio.wav")
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const response = await client.fineTuning.list();
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const response = await client.fineTuning.list();
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
             }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/speech" \
+            curl "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "cartesia/sonic-2",
-                   "input": "The quick brown fox jumps over the lazy dog.",
-                   "voice": "laidback woman"
-                 }' \
-                 --output audio.wav
-      operationId: audio-speech
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/AudioSpeechRequest'
+                 -H "Content-Type: application/json"
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/octet-stream:
-              schema:
-                type: string
-                format: binary
-            audio/wav:
-              schema:
-                type: string
-                format: binary
-            audio/mpeg:
-              schema:
-                type: string
-                format: binary
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/AudioSpeechStreamResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+          description: List of fine-tune jobs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/speech/websocket:
+                $ref: '#/components/schemas/FinetuneTruncatedList'
+  /fine-tunes/{id}:
     get:
-      tags: ['Audio']
-      summary: Real-time text-to-speech via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
-
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, voice, max_partial_length)
-
-        **Client Events:**
-        - `tts_session.updated`: Update session parameters like voice
-          ```json
-          {
-            "type": "tts_session.updated",
-            "session": {
-              "voice": "tara"
-            }
-          }
-          ```
-        - `input_text_buffer.append`: Send text chunks for TTS generation
-          ```json
-          {
-            "type": "input_text_buffer.append",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `input_text_buffer.clear`: Clear the buffered text
-          ```json
-          {
-            "type": "input_text_buffer.clear"
-          }
-          ```
-        - `input_text_buffer.commit`: Signal end of text input and process remaining text
-          ```json
-          {
-            "type": "input_text_buffer.commit"
-          }
-          ```
-
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "event_id": "evt_123456",
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.tts.session",
-              "modalities": ["text", "audio"],
-              "model": "hexgrad/Kokoro-82M",
-              "voice": "tara"
-            }
-          }
-          ```
-        - `conversation.item.input_text.received`: Acknowledgment that text was received
-          ```json
-          {
-            "type": "conversation.item.input_text.received",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "conversation.item.audio_output.delta",
-            "item_id": "tts_1",
-            "delta": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `conversation.item.audio_output.done`: Audio generation complete for an item
-          ```json
-          {
-            "type": "conversation.item.audio_output.done",
-            "item_id": "tts_1"
-          }
-          ```
-        - `conversation.item.tts.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.tts.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Text Processing:**
-        - Partial text (no sentence ending) is held in buffer until:
-          - We believe that the text is complete enough to be processed for TTS generation
-          - The partial text exceeds `max_partial_length` characters (default: 250)
-          - The `input_text_buffer.commit` event is received
+      tags: ['Fine-tuning']
+      summary: List job
+      description: List the metadata for a single fine-tuning job.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Audio Format:**
-        - Format: WAV (PCM s16le)
-        - Sample Rate: 24000 Hz
-        - Encoding: Base64
-        - Delivered via `conversation.item.audio_output.delta` events
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Invalid text format errors (400)
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-      operationId: realtime-tts
-      x-codeSamples:
+            print(fine_tune)
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v2)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            from together import Together
             import os
 
-            async def generate_speech():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Wait for session created
-                    session_msg = await ws.recv()
-                    session_data = json.loads(session_msg)
-                    print(f"Session created: {session_data['session']['id']}")
+            print(fine_tune)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    # Send text for TTS
-                    text_chunks = [
-                        "Hello, this is a test.",
-                        "This is the second sentence.",
-                        "And this is the final one."
-                    ]
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                    async def send_text():
-                        for chunk in text_chunks:
-                            await ws.send(json.dumps({
-                                "type": "input_text_buffer.append",
-                                "text": chunk
-                            }))
-                            await asyncio.sleep(0.5)  # Simulate typing
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-                        # Commit to process any remaining text
-                        await ws.send(json.dumps({
-                            "type": "input_text_buffer.commit"
-                        }))
+            console.log(fineTune);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-                    async def receive_audio():
-                        audio_data = bytearray()
-                        async for message in ws:
-                            data = json.loads(message)
-                            
-                            if data["type"] == "conversation.item.input_text.received":
-                                print(f"Text received: {data['text']}")
-                            elif data["type"] == "conversation.item.audio_output.delta":
-                                # Decode base64 audio chunk
-                                audio_chunk = base64.b64decode(data['delta'])
-                                audio_data.extend(audio_chunk)
-                                print(f"Received audio chunk for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.audio_output.done":
-                                print(f"Audio generation complete for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.tts.failed":
-                                error = data.get("error", {})
-                                print(f"Error: {error.get('message')}")
-                                break
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                        # Save the audio to a file
-                        with open("output.wav", "wb") as f:
-                            f.write(audio_data)
-                        print("Audio saved to output.wav")
-
-                    # Run send and receive concurrently
-                    await asyncio.gather(send_text(), receive_audio())
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            asyncio.run(generate_speech())
-        - lang: JavaScript
-          label: Node.js WebSocket Client
+            console.log(fineTune);
+        - lang: Shell
+          label: cURL
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
-
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
-            });
-
-            const audioData = [];
-
-            ws.on('open', () => {
-              console.log('WebSocket connection established!');
-            });
-
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
-
-              if (message.type === 'session.created') {
-                console.log(`Session created: ${message.session.id}`);
-                
-                // Send text chunks
-                const textChunks = [
-                  "Hello, this is a test.",
-                  "This is the second sentence.",
-                  "And this is the final one."
-                ];
-
-                textChunks.forEach((text, index) => {
-                  setTimeout(() => {
-                    ws.send(JSON.stringify({
-                      type: 'input_text_buffer.append',
-                      text: text
-                    }));
-                  }, index * 500);
-                });
-
-                // Commit after all chunks
-                setTimeout(() => {
-                  ws.send(JSON.stringify({
-                    type: 'input_text_buffer.commit'
-                  }));
-                }, textChunks.length * 500 + 100);
-
-              } else if (message.type === 'conversation.item.input_text.received') {
-                console.log(`Text received: ${message.text}`);
-              } else if (message.type === 'conversation.item.audio_output.delta') {
-                // Decode base64 audio chunk
-                const audioChunk = Buffer.from(message.delta, 'base64');
-                audioData.push(audioChunk);
-                console.log(`Received audio chunk for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.audio_output.done') {
-                console.log(`Audio generation complete for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.tts.failed') {
-                const errorMessage = message.error?.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
-            });
-
-            ws.on('close', () => {
-              // Save the audio to a file
-              if (audioData.length > 0) {
-                const completeAudio = Buffer.concat(audioData);
-                fs.writeFileSync('output.wav', completeAudio);
-                console.log('Audio saved to output.wav');
-              }
-            });
-
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
-            });
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
       parameters:
-        - in: query
-          name: model
-          required: false
-          schema:
-            type: string
-            enum:
-              - hexgrad/Kokoro-82M
-              - cartesia/sonic-english
-            default: hexgrad/Kokoro-82M
-          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
-        - in: query
-          name: voice
-          required: false
+        - name: id
+          in: path
+          required: true
           schema:
             type: string
-            default: tara
-          description: |
-            The voice to use for speech generation. Default is 'tara'. 
-            Available voices vary by model. Can also be updated via `tts_session.updated` event.
-        - in: query
-          name: max_partial_length
-          required: false
-          schema:
-            type: integer
-            default: 250
-          description: |
-            Maximum number of characters in partial text before forcing TTS generation 
-            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
-
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.tts.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
-  /audio/transcriptions:
-    post:
-      tags: ['Audio']
-      summary: Create audio transcription request
-      description: Transcribes audio into text
+        '200':
+          description: Fine-tune job details retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneResponse'
+    delete:
+      tags: ['Fine-tuning']
+      summary: Delete a fine-tune job
+      description: Delete a fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
+            response = client.fine_tuning.delete(id="ft-id")
 
-            print(response.text)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
+            response = client.fine_tuning.delete(id="ft-id")
 
-            print(response.text)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(response.text);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(response.text);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3"
-      operationId: audio-transcriptions
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranscriptionRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+        - name: force
+          in: query
+          schema:
+            type: boolean
+            default: false
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranscriptionResponse'
-        '400':
-          description: 'BadRequest'
+          description: Fine-tune job deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                $ref: '#/components/schemas/FinetuneDeleteResponse'
+        '404':
+          description: Fine-tune job not found
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+        '500':
+          description: Internal server error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /audio/translations:
-    post:
-      tags: ['Audio']
-      summary: Create audio translation request
-      description: Translates audio into English
+  /fine-tunes/{id}/events:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job events
+      description: List the events for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3103,15 +2759,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
+            events = client.fine_tuning.list_events(id="ft-id")
 
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
-
-            print(response.text)
+            print(events)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3122,113 +2772,73 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            response = client.fine_tuning.list_events(id="ft-id")
 
-            print(response.text)
+            for event in response.data:
+                print(event)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(response.text);
+            console.log(events);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(response.text);
+            console.log(events);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3" \
-                 -F "language=es"
-      operationId: audio-translations
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranslationRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranslationResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+          description: List of fine-tune events
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /clusters/availability-zones:
+                $ref: '#/components/schemas/FinetuneListEvents'
+  /fine-tunes/{id}/checkpoints:
     get:
-      tags: ['endpoints']
-      summary: List all available availability zones.
-      description: List all available availability zones.
-      operationId: availabilityZones
-      responses:
-        '200':
-          description: Success
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
+      tags: ['Fine-tuning']
+      summary: List checkpoints
+      description: List the checkpoints for a single fine-tuning job.
       x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+
+            print(checkpoints)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3239,9 +2849,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_avzones()
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(response.avzones)
+            print(checkpoints)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3251,9 +2861,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(response.avzones);
+            console.log(checkpoints);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3263,20 +2873,33 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(response.avzones);
+            console.log(checkpoints);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/clusters/availability-zones" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-  /endpoints:
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: List of fine-tune checkpoints
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneListCheckpoints'
+  /finetune/download:
     get:
-      tags: ['Endpoints']
-      summary: List all endpoints, can be filtered by type
-      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
+      tags: ['Fine-tuning']
+      summary: Download model
+      description: Receive a compressed fine-tuned model or checkpoint.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3289,10 +2912,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoints = client.endpoints.list()
+            # This will download the content to a location on disk
+            response = client.fine_tuning.download(id="ft-id")
 
-            for endpoint in endpoints:
-                print(endpoint.id)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3303,10 +2926,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list()
+            # Using `with_streaming_response` gives you control to do what you want with the response.
+            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
+
+            with stream as response:
+                for line in response.iter_lines():
+                    print(line)
 
-            for endpoint in response.data:
-                print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3316,11 +2942,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
+            });
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
+            console.log(await response.blob());
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3330,91 +2956,56 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
+            });
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
+            console.log(await response.blob());
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints" \
+            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      operationId: listEndpoints
       parameters:
-        - name: type
-          in: query
-          required: false
+        - in: query
+          name: ft_id
           schema:
             type: string
-            enum:
-              - dedicated
-              - serverless
-          description: Filter endpoints by type
-          example: dedicated
-        - name: usage_type
-          in: query
+          required: true
+          description: Fine-tune ID to download. A string that starts with `ft-`.
+        - in: query
+          name: checkpoint_step
+          schema:
+            type: integer
           required: false
+          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
+        - in: query
+          name: checkpoint
           schema:
             type: string
             enum:
-              - on-demand
-              - reserved
-          description: Filter endpoints by usage type
-          example: on-demand
-        - name: mine
-          in: query
-          required: false
-          schema:
-            type: boolean
-          description: If true, return only endpoints owned by the caller
+              - merged
+              - adapter
+              - model_output_path
+          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/ListEndpoint'
-                example:
-                  object: 'list'
-                  data:
-                    - object: 'endpoint'
-                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
-                      name: 'allenai/OLMo-7B'
-                      model: 'allenai/OLMo-7B'
-                      type: 'serverless'
-                      owner: 'together'
-                      state: 'STARTED'
-                      created_at: '2024-02-28T21:34:35.444Z'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+          description: Successfully downloaded the fine-tuned model or checkpoint.
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                type: string
+                format: binary
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /fine-tunes/{id}/cancel:
     post:
-      tags: ['Endpoints']
-      summary: Create a dedicated endpoint, it will start automatically
-      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
+      tags: ['Fine-tuning']
+      summary: Cancel job
+      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3427,14 +3018,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                min_replicas=2,
-                max_replicas=5,
-            )
+            response = client.fine_tuning.cancel(id="ft-id")
 
-            print(endpoint.id)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3445,16 +3031,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                autoscaling={
-                  "min_replicas": 2,
-                  "max_replicas": 5,
-                }
-            )
+            response = client.fine_tuning.cancel(id="ft-id")
 
-            print(endpoint.id)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3464,16 +3043,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(endpoint.id);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3483,62 +3055,38 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(endpoint.id);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/endpoints" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "hardware": "1x_nvidia_a100_80gb_sxm",
-                   "autoscaling": {
-                     "max_replicas": 5,
-                     "min_replicas": 2
-                   }
-                 }'
-      operationId: createEndpoint
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateEndpointRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Fine-tune ID to cancel. A string that starts with `ft-`.
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+          description: Successfully cancelled the fine-tuning job.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-  /endpoints/{endpointId}:
-    get:
-      tags: ['Endpoints']
-      summary: Get endpoint by ID
-      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /rerank:
+    post:
+      tags: ['Rerank']
+      summary: Create a rerank request
+      description: Query a reranker model
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3551,9 +3099,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.get("endpoint-id")
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            print(endpoint.id)
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3564,9 +3138,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.retrieve("endpoint-id")
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            print(endpoint.id)
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3576,9 +3176,34 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            console.log(endpoint);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3588,259 +3213,115 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            console.log(endpoint);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: getEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to retrieve
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-      responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-
-    patch:
-      tags: ['Endpoints']
-      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
-      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            endpoint = client.endpoints.update(
-                endpoint_id="endpoint-id",
-                state="STOPPED"
-            )
-
-            print(endpoint)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
-            });
-
-            console.log(endpoint);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
-            });
-
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
+            curl -X POST "https://api.together.xyz/v1/rerank" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "state": "STOPPED"
+                   "model": "Salesforce/Llama-Rank-v1",
+                   "query": "What animals can I find near Peru?",
+                   "documents": [{
+                      "title": "Llama",
+                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                    },
+                    {
+                      "title": "Panda",
+                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                    },
+                    {
+                      "title": "Guanaco",
+                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                    },
+                    {
+                      "title": "Wild Bactrian camel",
+                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                    }]
                  }'
-      operationId: updateEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to update
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      operationId: rerank
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              type: object
-              properties:
-                display_name:
-                  type: string
-                  description: A human-readable name for the endpoint
-                  example: My Llama3 70b endpoint
-                state:
-                  type: string
-                  description: The desired state of the endpoint
-                  enum:
-                    - STARTED
-                    - STOPPED
-                  example: STARTED
-                autoscaling:
-                  $ref: '#/components/schemas/Autoscaling'
-                  description: New autoscaling configuration for the endpoint
-                inactive_timeout:
-                  type: integer
-                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-                  nullable: true
-                  example: 60
+              $ref: '#/components/schemas/RerankRequest'
       responses:
         '200':
           description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
+                $ref: '#/components/schemas/RerankResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-
-    delete:
-      tags: ['Endpoints']
-      summary: Delete endpoint
-      description: Permanently deletes an endpoint. This action cannot be undone.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            endpoint = client.endpoints.delete(
-                endpoint_id="endpoint-id",
-            )
-
-            print(endpoint)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoint = await client.endpoints.delete("endpoint-id");
-
-            console.log(endpoint);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoint = await client.endpoints.delete("endpoint-id");
-
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      operationId: deleteEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to delete
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-      responses:
-        '204':
-          description: 'No Content - Endpoint successfully deleted'
-        '403':
-          description: 'Unauthorized'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
+        '503':
+          description: 'Overloaded'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-
-  /hardware:
-    get:
-      tags: ['Hardware']
-      summary: List available hardware configurations
-      description: >
-        Returns a list of available hardware configurations for deploying models.
-        When a model parameter is provided, it returns only hardware configurations compatible
-        with that model, including their current availability status.
+      deprecated: false
+  /audio/speech:
+    post:
+      tags: ['Audio']
+      summary: Create audio generation request
+      description: Generate audio from input text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3853,10 +3334,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_hardware()
+            response = client.audio.speech.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
+            )
 
-            for hardware in response:
-                print(hardware.id)
+            response.stream_to_file("audio.wav")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3867,340 +3351,545 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.hardware.list()
+            response = client.audio.speech.with_streaming_response.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
+            )
 
-            for hardware in response.data:
-                print(hardware.id)
+            with response as stream:
+              stream.stream_to_file("audio.wav")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
+            });
 
-            console.log(hardware);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
+            });
 
-            console.log(hardware);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/hardware" \
+            curl -X POST "https://api.together.xyz/v1/audio/speech" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listHardware
-      parameters:
-        - name: model
-          in: query
-          required: false
-          schema:
-            type: string
-          description: >
-            Filter hardware configurations by model compatibility. When provided,
-            the response includes availability status for each compatible configuration.
-          example: meta-llama/Llama-3-70b-chat-hf
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "cartesia/sonic-2",
+                   "input": "The quick brown fox jumps over the lazy dog.",
+                   "voice": "laidback woman"
+                 }' \
+                 --output audio.wav
+      operationId: audio-speech
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AudioSpeechRequest'
       responses:
         '200':
-          description: 'List of available hardware configurations'
+          description: 'OK'
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/HardwareWithStatus'
-        '403':
-          description: 'Unauthorized'
+                type: string
+                format: binary
+            audio/wav:
+              schema:
+                type: string
+                format: binary
+            audio/mpeg:
+              schema:
+                type: string
+                format: binary
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/AudioSpeechStreamResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /tci/execute:
-    post:
-      tags: ['Code Interpreter']
-      callbacks: {}
+  /audio/speech/websocket:
+    get:
+      tags: ['Audio']
+      summary: Real-time text-to-speech via WebSocket
       description: |
-        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
-        between calls to the same session.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
+        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, voice, max_partial_length)
 
-            response = client.code_interpreter.run(
-                code="print('Hello world!')",
-                language="python",
-            )
+        **Client Events:**
+        - `tts_session.updated`: Update session parameters like voice
+          ```json
+          {
+            "type": "tts_session.updated",
+            "session": {
+              "voice": "tara"
+            }
+          }
+          ```
+        - `input_text_buffer.append`: Send text chunks for TTS generation
+          ```json
+          {
+            "type": "input_text_buffer.append",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `input_text_buffer.clear`: Clear the buffered text
+          ```json
+          {
+            "type": "input_text_buffer.clear"
+          }
+          ```
+        - `input_text_buffer.commit`: Signal end of text input and process remaining text
+          ```json
+          {
+            "type": "input_text_buffer.commit"
+          }
+          ```
 
-            print(response.data.outputs[0].data);
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "event_id": "evt_123456",
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.tts.session",
+              "modalities": ["text", "audio"],
+              "model": "hexgrad/Kokoro-82M",
+              "voice": "tara"
+            }
+          }
+          ```
+        - `conversation.item.input_text.received`: Acknowledgment that text was received
+          ```json
+          {
+            "type": "conversation.item.input_text.received",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "conversation.item.audio_output.delta",
+            "item_id": "tts_1",
+            "delta": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `conversation.item.audio_output.done`: Audio generation complete for an item
+          ```json
+          {
+            "type": "conversation.item.audio_output.done",
+            "item_id": "tts_1"
+          }
+          ```
+        - `conversation.item.tts.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.tts.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+        **Text Processing:**
+        - Partial text (no sentence ending) is held in buffer until:
+          - We believe that the text is complete enough to be processed for TTS generation
+          - The partial text exceeds `max_partial_length` characters (default: 250)
+          - The `input_text_buffer.commit` event is received
 
-            response = client.code_interpreter.execute(
-                code="print('Hello world!')",
-                language="python",
-            )
+        **Audio Format:**
+        - Format: WAV (PCM s16le)
+        - Sample Rate: 24000 Hz
+        - Encoding: Base64
+        - Delivered via `conversation.item.audio_output.delta` events
 
-            print(response.data.outputs[0].data);
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Invalid text format errors (400)
+
+      operationId: realtime-tts
+      x-codeSamples:
+        - lang: Python
+          label: Python WebSocket Client
           source: |
-            import Together from "together-ai";
+            import asyncio
+            import websockets
+            import json
+            import base64
+            import os
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            async def generate_speech():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
-            });
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
 
-            console.log(response.data?.outputs?.[0]?.data);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Wait for session created
+                    session_msg = await ws.recv()
+                    session_data = json.loads(session_msg)
+                    print(f"Session created: {session_data['session']['id']}")
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
+                    # Send text for TTS
+                    text_chunks = [
+                        "Hello, this is a test.",
+                        "This is the second sentence.",
+                        "And this is the final one."
+                    ]
+
+                    async def send_text():
+                        for chunk in text_chunks:
+                            await ws.send(json.dumps({
+                                "type": "input_text_buffer.append",
+                                "text": chunk
+                            }))
+                            await asyncio.sleep(0.5)  # Simulate typing
+
+                        # Commit to process any remaining text
+                        await ws.send(json.dumps({
+                            "type": "input_text_buffer.commit"
+                        }))
+
+                    async def receive_audio():
+                        audio_data = bytearray()
+                        async for message in ws:
+                            data = json.loads(message)
+                            
+                            if data["type"] == "conversation.item.input_text.received":
+                                print(f"Text received: {data['text']}")
+                            elif data["type"] == "conversation.item.audio_output.delta":
+                                # Decode base64 audio chunk
+                                audio_chunk = base64.b64decode(data['delta'])
+                                audio_data.extend(audio_chunk)
+                                print(f"Received audio chunk for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.audio_output.done":
+                                print(f"Audio generation complete for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.tts.failed":
+                                error = data.get("error", {})
+                                print(f"Error: {error.get('message')}")
+                                break
+
+                        # Save the audio to a file
+                        with open("output.wav", "wb") as f:
+                            f.write(audio_data)
+                        print("Audio saved to output.wav")
+
+                    # Run send and receive concurrently
+                    await asyncio.gather(send_text(), receive_audio())
+
+            asyncio.run(generate_speech())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
+          source: |
+            import WebSocket from 'ws';
+            import fs from 'fs';
+
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
+            const audioData = [];
+
+            ws.on('open', () => {
+              console.log('WebSocket connection established!');
             });
 
-            console.log(response.data?.outputs?.[0]?.data);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/tci/execute" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "code": "print(\'Hello world!\')",
-                   "language": "python"
-                 }'
-      operationId: tci/execute
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ExecuteRequest'
-        description: Execute Request
-        required: false
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'session.created') {
+                console.log(`Session created: ${message.session.id}`);
+                
+                // Send text chunks
+                const textChunks = [
+                  "Hello, this is a test.",
+                  "This is the second sentence.",
+                  "And this is the final one."
+                ];
+
+                textChunks.forEach((text, index) => {
+                  setTimeout(() => {
+                    ws.send(JSON.stringify({
+                      type: 'input_text_buffer.append',
+                      text: text
+                    }));
+                  }, index * 500);
+                });
+
+                // Commit after all chunks
+                setTimeout(() => {
+                  ws.send(JSON.stringify({
+                    type: 'input_text_buffer.commit'
+                  }));
+                }, textChunks.length * 500 + 100);
+
+              } else if (message.type === 'conversation.item.input_text.received') {
+                console.log(`Text received: ${message.text}`);
+              } else if (message.type === 'conversation.item.audio_output.delta') {
+                // Decode base64 audio chunk
+                const audioChunk = Buffer.from(message.delta, 'base64');
+                audioData.push(audioChunk);
+                console.log(`Received audio chunk for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.audio_output.done') {
+                console.log(`Audio generation complete for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.tts.failed') {
+                const errorMessage = message.error?.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('close', () => {
+              // Save the audio to a file
+              if (audioData.length > 0) {
+                const completeAudio = Buffer.concat(audioData);
+                fs.writeFileSync('output.wav', completeAudio);
+                console.log('Audio saved to output.wav');
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: false
+          schema:
+            type: string
+            enum:
+              - hexgrad/Kokoro-82M
+              - cartesia/sonic-english
+            default: hexgrad/Kokoro-82M
+          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
+        - in: query
+          name: voice
+          required: false
+          schema:
+            type: string
+            default: tara
+          description: |
+            The voice to use for speech generation. Default is 'tara'. 
+            Available voices vary by model. Can also be updated via `tts_session.updated` event.
+        - in: query
+          name: max_partial_length
+          required: false
+          schema:
+            type: integer
+            default: 250
+          description: |
+            Maximum number of characters in partial text before forcing TTS generation 
+            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ExecuteResponse'
-          description: Execute Response
-  /tci/sessions:
-    get:
-      tags: ['Code Interpreter']
-      callbacks: {}
-      description: |
-        Lists all your currently active sessions.
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.tts.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+  /audio/transcriptions:
+    post:
+      tags: ['Audio']
+      summary: Create audio transcription request
+      description: Transcribes audio into text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
-            # together v1 does not support this method
+            from together import Together
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            file = open("audio.wav", "rb")
+
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.sessions.list()
+            file = open("audio.wav", "rb")
 
-            for session in response.data.sessions:
-                print(session.id)
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.sessions.list();
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
-            }
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.sessions.list();
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
-            }
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/tci/sessions" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: sessions/list
-      parameters: []
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3"
+      operationId: audio-transcriptions
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranscriptionRequest'
       responses:
         '200':
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/SessionListResponse'
-          description: List Response
-  /batches:
-    get:
-      tags: ['Batches']
-      summary: List batch jobs
-      description: List all batch jobs for the authenticated user
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batches = client.batches.list_batches()
-
-            for batch in batches:
-                print(batch.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batches = client.batches.list()
-
-            for batch in batches:
-                print(batch.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batches = await client.batches.list();
-
-            console.log(batches);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batches = await client.batches.list();
-
-            console.log(batches);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/batches" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
-      responses:
-        '200':
-          description: OK
+                $ref: '#/components/schemas/AudioTranscriptionResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                type: array
-                items:
-                  $ref: '#/components/schemas/BatchJob'
+                $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
+  /audio/translations:
     post:
-      tags: ['Batches']
-      summary: Create a batch job
-      description: Create a new batch job with the given input file and endpoint
+      tags: ['Audio']
+      summary: Create audio translation request
+      description: Translates audio into English
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4213,9 +3902,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
+            file = open("audio.wav", "rb")
 
-            print(batch.id)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4226,109 +3921,113 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
+            file = open("audio.wav", "rb")
 
-            print(batch.job)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
+
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
             });
 
-            console.log(batch);
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
+
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
             });
 
-            console.log(batch);
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "endpoint": "/v1/chat/completions",
-                   "input_file_id": "file-id"
-                 }'
-      security:
-        - bearerAuth: []
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3" \
+                 -F "language=es"
+      operationId: audio-translations
       requestBody:
         required: true
         content:
-          application/json:
+          multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateBatchRequest'
+              $ref: '#/components/schemas/AudioTranslationRequest'
       responses:
-        '201':
-          description: Job created (potentially with warnings)
+        '200':
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJobWithWarning'
+                $ref: '#/components/schemas/AudioTranslationResponse'
         '400':
-          description: Bad Request
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '429':
-          description: Too Many Requests
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/ErrorData'
+  /clusters/availability-zones:
+    get:
+      tags: ['endpoints']
+      summary: List all available availability zones.
+      description: List all available availability zones.
+      operationId: availabilityZones
+      responses:
+        '200':
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-
-  /batches/{id}:
-    get:
-      tags: ['Batches']
-      summary: Get a batch job
-      description: Get details of a batch job by ID
+                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
       x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.get_batch("batch_id")
-
-            print(batch)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4339,9 +4038,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.retrieve("batch_id")
+            response = client.endpoints.list_avzones()
 
-            print(batch)
+            print(response.avzones)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4351,9 +4050,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const response = await client.endpoints.listAvzones();
 
-            console.log(batch);
+            console.log(response.avzones);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4363,67 +4062,20 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const response = await client.endpoints.listAvzones();
 
-            console.log(batch);
+            console.log(response.avzones);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/batches/ID" \
+            curl "https://api.together.xyz/v1/clusters/availability-zones" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
-      parameters:
-        - name: id
-          in: path
-          required: true
-          description: Job ID
-          schema:
-            type: string
-          example: 'batch_job_abc123def456'
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '403':
-          description: Forbidden
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /batches/{id}/cancel:
-    post:
-      tags: ['Batches']
-      summary: Cancel a batch job
-      description: Cancel a batch job by ID
+  /endpoints:
+    get:
+      tags: ['Endpoints']
+      summary: List all endpoints, can be filtered by type
+      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4436,9 +4088,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.cancel("batch_id")
+            endpoints = client.endpoints.list()
 
-            print(batch)
+            for endpoint in endpoints:
+                print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4449,9 +4102,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.cancel("batch_id")
+            response = client.endpoints.list()
 
-            print(batch)
+            for endpoint in response.data:
+                print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4461,9 +4115,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.cancel("batch-id");
+            const endpoints = await client.endpoints.list();
 
-            console.log(batch);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4473,68 +4129,91 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.cancel("batch-id");
+            const endpoints = await client.endpoints.list();
 
-            console.log(batch);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
+            curl "https://api.together.xyz/v1/endpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+      operationId: listEndpoints
       parameters:
-        - name: id
-          in: path
-          required: true
-          description: Job ID
+        - name: type
+          in: query
+          required: false
           schema:
             type: string
-          example: 'batch_job_abc123def456'
+            enum:
+              - dedicated
+              - serverless
+          description: Filter endpoints by type
+          example: dedicated
+        - name: usage_type
+          in: query
+          required: false
+          schema:
+            type: string
+            enum:
+              - on-demand
+              - reserved
+          description: Filter endpoints by usage type
+          example: on-demand
+        - name: mine
+          in: query
+          required: false
+          schema:
+            type: boolean
+          description: If true, return only endpoints owned by the caller
       responses:
         '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/ListEndpoint'
+                example:
+                  object: 'list'
+                  data:
+                    - object: 'endpoint'
+                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
+                      name: 'allenai/OLMo-7B'
+                      model: 'allenai/OLMo-7B'
+                      type: 'serverless'
+                      owner: 'together'
+                      state: 'STARTED'
+                      created_at: '2024-02-28T21:34:35.444Z'
         '403':
-          description: Forbidden
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '500':
-          description: Internal Server Error
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /evaluation:
+                $ref: '#/components/schemas/ErrorData'
     post:
-      tags:
-        - evaluation
-      summary: Create an evaluation job
-      operationId: createEvaluationJob
+      tags: ['Endpoints']
+      summary: Create a dedicated endpoint, it will start automatically
+      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4547,17 +4226,14 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.create(
-                type="classify",
-                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                judge_system_template="You are an expert evaluator...",
-                input_data_file_path="file-abc123",
-                labels=["good", "bad"],
-                pass_labels=["good"],
-                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                min_replicas=2,
+                max_replicas=5,
             )
 
-            print(response.workflow_id)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4568,22 +4244,16 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.create(
-                type="classify",
-                parameters=ParametersEvaluationClassifyParameters(
-                    judge=ParametersEvaluationClassifyParametersJudge(
-                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                        model_source="serverless",
-                        system_template="You are an expert evaluator...",
-                    ),
-                    input_data_file_path="file-abc123",
-                    labels=["good", "bad"],
-                    pass_labels=["good"],
-                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-                )
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                autoscaling={
+                  "min_replicas": 2,
+                  "max_replicas": 5,
+                }
             )
 
-            print(response.workflow_id)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4593,22 +4263,16 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
             });
 
-            console.log(response.workflow_id);
+            console.log(endpoint.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4618,54 +4282,62 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
             });
 
-            console.log(response.workflow_id);
-
-
+            console.log(endpoint.id);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/endpoints" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "hardware": "1x_nvidia_a100_80gb_sxm",
+                   "autoscaling": {
+                     "max_replicas": 5,
+                     "min_replicas": 2
+                   }
+                 }'
+      operationId: createEndpoint
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/EvaluationTypedRequest"
+              $ref: '#/components/schemas/CreateEndpointRequest'
       responses:
-        "200":
-          description: "Evaluation job created successfully"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationResponse"
-        "400":
-          description: "Invalid request format"
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to create evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                $ref: '#/components/schemas/ErrorData'
+
+  /endpoints/{endpointId}:
     get:
-      tags:
-        - evaluation
-      summary: Get all evaluation jobs
-      operationId: getAllEvaluationJobs
+      tags: ['Endpoints']
+      summary: Get endpoint by ID
+      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4678,10 +4350,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            jobs = client.evaluation.list()
+            endpoint = client.endpoints.get("endpoint-id")
 
-            for job in jobs:
-                print(job.workflow_id)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4692,10 +4363,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.list()
+            endpoint = client.endpoints.retrieve("endpoint-id")
 
-            for job in response:
-                print(job.workflow_id)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4705,11 +4375,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4719,95 +4387,54 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: getEndpoint
       parameters:
-        - name: status
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "pending"
-        - name: limit
-          in: query
-          required: false
-          schema:
-            type: integer
-            default: 10
-        - name: userId
-          in: query
-          required: false
-          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
+        - name: endpointId
+          in: path
+          required: true
           schema:
             type: string
+          description: The ID of the endpoint to retrieve
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        "200":
-          description: "evaluation jobs retrieved successfully"
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: "#/components/schemas/EvaluationJob"
-        "400":
-          description: "Invalid request format"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving jobs from manager"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/model-list:
-    get:
-      tags:
-        - evaluation
-      summary: Get model list
-      operationId: getModelList
-      parameters:
-        - name: model_source
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "all"
-      responses:
-        "200":
-          description: "Model list retrieved successfully"
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  model_list:
-                    type: array
-                    items:
-                      type: string
-                      description: "The name of the model"
-        "400":
-          description: "Invalid request format"
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving model list"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/{id}:
-    get:
-      tags:
-        - evaluation
-      summary: Get evaluation job details
-      operationId: getEvaluationJobDetails
+                $ref: '#/components/schemas/ErrorData'
+
+    patch:
+      tags: ['Endpoints']
+      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
+      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4820,22 +4447,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.retrieve('eval_id')
-
-            print(response)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
+            endpoint = client.endpoints.update(
+                endpoint_id="endpoint-id",
+                state="STOPPED"
             )
 
-            response = client.evals.retrieve('eval_id')
-
-            print(response)
+            print(endpoint)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4845,9 +4462,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
 
-            console.log(response);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4857,41 +4476,85 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
 
-            console.log(response);
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "state": "STOPPED"
+                 }'
+      operationId: updateEndpoint
       parameters:
-        - name: id
+        - name: endpointId
           in: path
           required: true
           schema:
             type: string
+          description: The ID of the endpoint to update
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                display_name:
+                  type: string
+                  description: A human-readable name for the endpoint
+                  example: My Llama3 70b endpoint
+                state:
+                  type: string
+                  description: The desired state of the endpoint
+                  enum:
+                    - STARTED
+                    - STOPPED
+                  example: STARTED
+                autoscaling:
+                  $ref: '#/components/schemas/Autoscaling'
+                  description: New autoscaling configuration for the endpoint
+                inactive_timeout:
+                  type: integer
+                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
+                  nullable: true
+                  example: 60
       responses:
-        "200":
-          description: "Evaluation job details retrieved successfully"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationJob"
-        "404":
-          description: "Evaluation job not found"
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-
-  /evaluation/{id}/status:
-    get:
-      tags:
-        - evaluation
-      summary: Get evaluation job status and results
-      operationId: getEvaluationJobStatusAndResults
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+
+    delete:
+      tags: ['Endpoints']
+      summary: Delete endpoint
+      description: Permanently deletes an endpoint. This action cannot be undone.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4904,24 +4567,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.status('eval_id')
-
-            print(response.status)
-            print(response.results)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
+            endpoint = client.endpoints.delete(
+                endpoint_id="endpoint-id",
             )
 
-            response = client.evals.status('eval_id')
-
-            print(response.status)
-            print(response.results)
+            print(endpoint)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4931,10 +4581,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-            console.log(response.status);
-            console.log(response.results);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4944,289 +4593,2300 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-            console.log(response.status);
-            console.log(response.results);
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+      operationId: deleteEndpoint
       parameters:
-        - name: id
+        - name: endpointId
           in: path
           required: true
           schema:
             type: string
+          description: The ID of the endpoint to delete
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        "200":
-          description: "Evaluation job status and results retrieved successfully"
+        '204':
+          description: 'No Content - Endpoint successfully deleted'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  status:
-                    type: string
-                    description: "The status of the evaluation job"
-                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
-                  results:
-                    description: "The results of the evaluation job"
-                    oneOf:
-                      - $ref: "#/components/schemas/EvaluationClassifyResults"
-                      - $ref: "#/components/schemas/EvaluationScoreResults"
-                      - $ref: "#/components/schemas/EvaluationCompareResults"
-        "404":
-          description: "Evaluation job not found"
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                $ref: '#/components/schemas/ErrorData'
 
-  /realtime:
+  /hardware:
     get:
-      tags: ['Audio']
-      summary: Real-time audio transcription via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
-
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, input_audio_format)
-
-        **Client Events:**
-        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "input_audio_buffer.append",
-            "audio": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `input_audio_buffer.commit`: Signal end of audio stream
-          ```json
-          {
-            "type": "input_audio_buffer.commit"
-          }
-          ```
-
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.session",
-              "modalities": ["audio"],
-              "model": "openai/whisper-large-v3"
-            }
-          }
-          ```
-        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.delta",
-            "delta": "The quick brown"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.completed`: Final transcription
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.completed",
-            "transcript": "The quick brown fox jumps over the lazy dog"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Unsupported audio format errors (400)
-
-      operationId: realtime-transcription
+      tags: ['Hardware']
+      summary: List available hardware configurations
+      description: >
+        Returns a list of available hardware configurations for deploying models.
+        When a model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
       x-codeSamples:
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v1)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
             import os
 
-            async def transcribe_audio():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
+            response = client.endpoints.list_hardware()
 
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Read audio file
-                    with open("audio.wav", "rb") as f:
-                        audio_data = f.read()
+            for hardware in response:
+                print(hardware.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-                    # Send audio in chunks with delay to simulate real-time
-                    chunk_size = 8192
-                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
-                    delay_per_chunk = chunk_size / bytes_per_second
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i+chunk_size]
-                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
-                        await ws.send(json.dumps({
-                            "type": "input_audio_buffer.append",
-                            "audio": base64_chunk
-                        }))
-                        # Simulate real-time streaming
-                        if i + chunk_size < len(audio_data):
-                            await asyncio.sleep(delay_per_chunk)
+            response = client.hardware.list()
 
-                    # Commit the audio buffer
-                    await ws.send(json.dumps({
-                        "type": "input_audio_buffer.commit"
-                    }))
+            for hardware in response.data:
+                print(hardware.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    # Receive transcription results
-                    async for message in ws:
-                        data = json.loads(message)
-                        if data["type"] == "conversation.item.input_audio_transcription.delta":
-                            print(f"Partial: {data['delta']}")
-                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
-                            print(f"Final: {data['transcript']}")
-                            break
-                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
-                            error = data.get("error", {})
-                            print(f"Error: {error.get('message')}")
-                            break
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            asyncio.run(transcribe_audio())
+            const hardware = await client.hardware.list();
+
+            console.log(hardware);
         - lang: JavaScript
-          label: Node.js WebSocket Client
+          label: Together AI SDK (JavaScript)
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+            import Together from "together-ai";
 
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('open', async () => {
-              console.log('WebSocket connection established!');
+            const hardware = await client.hardware.list();
 
-              // Read audio file
-              const audioData = fs.readFileSync('audio.wav');
+            console.log(hardware);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/hardware" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: listHardware
+      parameters:
+        - name: model
+          in: query
+          required: false
+          schema:
+            type: string
+          description: >
+            Filter hardware configurations by model compatibility. When provided,
+            the response includes availability status for each compatible configuration.
+          example: meta-llama/Llama-3-70b-chat-hf
+      responses:
+        '200':
+          description: 'List of available hardware configurations'
+          content:
+            application/json:
+              schema:
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/HardwareWithStatus'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /tci/execute:
+    post:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
+        between calls to the same session.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-              // Send audio in chunks with delay to simulate real-time
-              const chunkSize = 8192;
-              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
-              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-              for (let i = 0; i < audioData.length; i += chunkSize) {
-                const chunk = audioData.slice(i, i + chunkSize);
-                const base64Chunk = chunk.toString('base64');
-                ws.send(JSON.stringify({
-                  type: 'input_audio_buffer.append',
-                  audio: base64Chunk
-                }));
+            response = client.code_interpreter.run(
+                code="print('Hello world!')",
+                language="python",
+            )
 
-                // Simulate real-time streaming
-                if (i + chunkSize < audioData.length) {
-                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
-                }
-              }
+            print(response.data.outputs[0].data);
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-              // Commit audio buffer
-              ws.send(JSON.stringify({
-                type: 'input_audio_buffer.commit'
-              }));
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.code_interpreter.execute(
+                code="print('Hello world!')",
+                language="python",
+            )
+
+            print(response.data.outputs[0].data);
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
+            });
 
-              if (message.type === 'conversation.item.input_audio_transcription.delta') {
-                console.log(`Partial: ${message.delta}`);
-              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
-                console.log(`Final: ${message.transcript}`);
-                ws.close();
-              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
-                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
+            console.log(response.data?.outputs?.[0]?.data);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
             });
-      parameters:
-        - in: query
-          name: model
-          required: true
-          schema:
-            type: string
-            enum:
-              - openai/whisper-large-v3
-            default: openai/whisper-large-v3
-          description: The Whisper model to use for transcription
-        - in: query
-          name: input_audio_format
-          required: true
-          schema:
-            type: string
-            enum:
-              - pcm_s16le_16000
-            default: pcm_s16le_16000
-          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+
+            console.log(response.data?.outputs?.[0]?.data);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/tci/execute" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "code": "print(\'Hello world!\')",
+                   "language": "python"
+                 }'
+      operationId: tci/execute
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ExecuteRequest'
+        description: Execute Request
+        required: false
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ExecuteResponse'
+          description: Execute Response
+  /tci/sessions:
+    get:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Lists all your currently active sessions.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            # together v1 does not support this method
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.input_audio_transcription.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-components:
-  securitySchemes:
-    bearerAuth:
-      type: http
-      scheme: bearer
-      x-bearer-format: bearer
-      x-default: default
+            response = client.code_interpreter.sessions.list()
 
-  schemas:
+            for session in response.data.sessions:
+                print(session.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/tci/sessions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: sessions/list
+      parameters: []
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SessionListResponse'
+          description: List Response
+  /batches:
+    get:
+      tags: ['Batches']
+      summary: List batch jobs
+      description: List all batch jobs for the authenticated user
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list_batches()
+
+            for batch in batches:
+                print(batch.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list()
+
+            for batch in batches:
+                print(batch.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batches = await client.batches.list();
+
+            console.log(batches);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batches = await client.batches.list();
+
+            console.log(batches);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/BatchJob'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+    post:
+      tags: ['Batches']
+      summary: Create a batch job
+      description: Create a new batch job with the given input file and endpoint
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
+
+            print(batch.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
+
+            print(batch.job)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "endpoint": "/v1/chat/completions",
+                   "input_file_id": "file-id"
+                 }'
+      security:
+        - bearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
+      responses:
+        '201':
+          description: Job created (potentially with warnings)
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJobWithWarning'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '429':
+          description: Too Many Requests
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+
+  /batches/{id}:
+    get:
+      tags: ['Batches']
+      summary: Get a batch job
+      description: Get details of a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.get_batch("batch_id")
+
+            print(batch)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.retrieve("batch_id")
+
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.retrieve("batch-id");
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.retrieve("batch-id");
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches/ID" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /batches/{id}/cancel:
+    post:
+      tags: ['Batches']
+      summary: Cancel a batch job
+      description: Cancel a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.cancel("batch_id")
+
+            print(batch)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.cancel("batch_id")
+
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /evaluation:
+    post:
+      tags:
+        - evaluation
+      summary: Create an evaluation job
+      operationId: createEvaluationJob
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.create(
+                type="classify",
+                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                judge_system_template="You are an expert evaluator...",
+                input_data_file_path="file-abc123",
+                labels=["good", "bad"],
+                pass_labels=["good"],
+                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+            )
+
+            print(response.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.create(
+                type="classify",
+                parameters=ParametersEvaluationClassifyParameters(
+                    judge=ParametersEvaluationClassifyParametersJudge(
+                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                        model_source="serverless",
+                        system_template="You are an expert evaluator...",
+                    ),
+                    input_data_file_path="file-abc123",
+                    labels=["good", "bad"],
+                    pass_labels=["good"],
+                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+                )
+            )
+
+            print(response.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+
+
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/EvaluationTypedRequest"
+      responses:
+        "200":
+          description: "Evaluation job created successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationResponse"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to create evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+    get:
+      tags:
+        - evaluation
+      summary: Get all evaluation jobs
+      operationId: getAllEvaluationJobs
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            jobs = client.evaluation.list()
+
+            for job in jobs:
+                print(job.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.list()
+
+            for job in response:
+                print(job.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+      parameters:
+        - name: status
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "pending"
+        - name: limit
+          in: query
+          required: false
+          schema:
+            type: integer
+            default: 10
+        - name: userId
+          in: query
+          required: false
+          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "evaluation jobs retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/EvaluationJob"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving jobs from manager"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/model-list:
+    get:
+      tags:
+        - evaluation
+      summary: Get model list
+      operationId: getModelList
+      parameters:
+        - name: model_source
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "all"
+      responses:
+        "200":
+          description: "Model list retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  model_list:
+                    type: array
+                    items:
+                      type: string
+                      description: "The name of the model"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving model list"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/{id}:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job details
+      operationId: getEvaluationJobDetails
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.retrieve('eval_id')
+
+            print(response)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.retrieve('eval_id')
+
+            print(response)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job details retrieved successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationJob"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /evaluation/{id}/status:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job status and results
+      operationId: getEvaluationJobStatusAndResults
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job status and results retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+                    description: "The status of the evaluation job"
+                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
+                  results:
+                    description: "The results of the evaluation job"
+                    oneOf:
+                      - $ref: "#/components/schemas/EvaluationClassifyResults"
+                      - $ref: "#/components/schemas/EvaluationScoreResults"
+                      - $ref: "#/components/schemas/EvaluationCompareResults"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /realtime:
+    get:
+      tags: ['Audio']
+      summary: Real-time audio transcription via WebSocket
+      description: |
+        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, input_audio_format)
+
+        **Client Events:**
+        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "input_audio_buffer.append",
+            "audio": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `input_audio_buffer.commit`: Signal end of audio stream
+          ```json
+          {
+            "type": "input_audio_buffer.commit"
+          }
+          ```
+
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.session",
+              "modalities": ["audio"],
+              "model": "openai/whisper-large-v3"
+            }
+          }
+          ```
+        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.delta",
+            "delta": "The quick brown"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.completed`: Final transcription
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.completed",
+            "transcript": "The quick brown fox jumps over the lazy dog"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
+
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Unsupported audio format errors (400)
+
+      operationId: realtime-transcription
+      x-codeSamples:
+        - lang: Python
+          label: Python WebSocket Client
+          source: |
+            import asyncio
+            import websockets
+            import json
+            import base64
+            import os
+
+            async def transcribe_audio():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
+
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
+
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Read audio file
+                    with open("audio.wav", "rb") as f:
+                        audio_data = f.read()
+
+                    # Send audio in chunks with delay to simulate real-time
+                    chunk_size = 8192
+                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
+                    delay_per_chunk = chunk_size / bytes_per_second
+
+                    for i in range(0, len(audio_data), chunk_size):
+                        chunk = audio_data[i:i+chunk_size]
+                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
+                        await ws.send(json.dumps({
+                            "type": "input_audio_buffer.append",
+                            "audio": base64_chunk
+                        }))
+                        # Simulate real-time streaming
+                        if i + chunk_size < len(audio_data):
+                            await asyncio.sleep(delay_per_chunk)
+
+                    # Commit the audio buffer
+                    await ws.send(json.dumps({
+                        "type": "input_audio_buffer.commit"
+                    }))
+
+                    # Receive transcription results
+                    async for message in ws:
+                        data = json.loads(message)
+                        if data["type"] == "conversation.item.input_audio_transcription.delta":
+                            print(f"Partial: {data['delta']}")
+                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
+                            print(f"Final: {data['transcript']}")
+                            break
+                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
+                            error = data.get("error", {})
+                            print(f"Error: {error.get('message')}")
+                            break
+
+            asyncio.run(transcribe_audio())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
+          source: |
+            import WebSocket from 'ws';
+            import fs from 'fs';
+
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
+
+            ws.on('open', async () => {
+              console.log('WebSocket connection established!');
+
+              // Read audio file
+              const audioData = fs.readFileSync('audio.wav');
+
+              // Send audio in chunks with delay to simulate real-time
+              const chunkSize = 8192;
+              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
+              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+
+              for (let i = 0; i < audioData.length; i += chunkSize) {
+                const chunk = audioData.slice(i, i + chunkSize);
+                const base64Chunk = chunk.toString('base64');
+                ws.send(JSON.stringify({
+                  type: 'input_audio_buffer.append',
+                  audio: base64Chunk
+                }));
+
+                // Simulate real-time streaming
+                if (i + chunkSize < audioData.length) {
+                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
+                }
+              }
+
+              // Commit audio buffer
+              ws.send(JSON.stringify({
+                type: 'input_audio_buffer.commit'
+              }));
+            });
+
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'conversation.item.input_audio_transcription.delta') {
+                console.log(`Partial: ${message.delta}`);
+              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
+                console.log(`Final: ${message.transcript}`);
+                ws.close();
+              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
+                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: true
+          schema:
+            type: string
+            enum:
+              - openai/whisper-large-v3
+            default: openai/whisper-large-v3
+          description: The Whisper model to use for transcription
+        - in: query
+          name: input_audio_format
+          required: true
+          schema:
+            type: string
+            enum:
+              - pcm_s16le_16000
+            default: pcm_s16le_16000
+          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+      responses:
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.input_audio_transcription.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+
+components:
+  securitySchemes:
+    bearerAuth:
+      type: http
+      scheme: bearer
+      x-bearer-format: bearer
+      x-default: default
+
+  schemas:
+    api_v1.SignedURLResponse:
+      properties:
+        url:
+          type: string
+      type: object
+    files.AbortMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        upload_id:
+          description: UploadID is the unique identifier returned from the multi-part
+            upload initialization. Aborting will discard all uploaded parts
+          type: string
+      required:
+        - filename
+        - upload_id
+      type: object
+    files.CompleteMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        parts:
+          description: Parts is the list of successfully uploaded parts with their ETags.
+            Must include all parts in order
+          items:
+            $ref: "#/components/schemas/files.CompletedPart"
+          minItems: 1
+          type: array
+        upload_id:
+          description: UploadID is the unique identifier returned from the multi-part
+            upload initialization
+          type: string
+      required:
+        - filename
+        - parts
+        - upload_id
+      type: object
+    files.CompleteUploadResponse:
+      properties:
+        completed_at:
+          description: CompletedAt is the timestamp when the upload was completed
+          type: string
+        path:
+          description: Path is the storage path where the uploaded file can be accessed
+          type: string
+        size:
+          description: Size is the total size of the uploaded file in bytes
+          type: integer
+      type: object
+    files.CompletedPart:
+      properties:
+        etag:
+          description: ETag is the entity tag returned by the storage service after
+            uploading this part. This is used to verify the part's integrity
+          type: string
+        part_number:
+          description: PartNumber is the sequential number of this part (starting from 1)
+          type: integer
+      type: object
+    files.FileRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+      required:
+        - filename
+      type: object
+    files.InitiateMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        parts_count:
+          description: PartsCount is the number of parts to split the file into for
+            parallel upload. Must be between 1 and 10,000. Use multi-part upload
+            for files larger than 100MB for better performance and reliability
+          maximum: 10000
+          minimum: 1
+          type: integer
+      required:
+        - filename
+        - parts_count
+      type: object
+    files.MultiPartInitResponse:
+      properties:
+        part_upload_urls:
+          description: PartUploadURLs is the list of pre-signed URLs for uploading each
+            part in parallel
+          items:
+            $ref: "#/components/schemas/files.PartUploadURL"
+          type: array
+        path:
+          description: Path is the storage path where the file will be accessible after
+            upload completion
+          type: string
+        upload_id:
+          description: UploadID is the unique identifier for this multi-part upload
+            session. Use this when completing or aborting the upload
+          type: string
+      type: object
+    files.PartUploadURL:
+      properties:
+        headers:
+          additionalProperties:
+            type: string
+          description: Headers are the required HTTP headers to include when uploading
+            this part
+          type: object
+        method:
+          description: Method is the HTTP method to use for uploading this part (typically
+            "PUT")
+          type: string
+        part_number:
+          description: PartNumber is the sequential number identifying this part (starting
+            from 1)
+          type: integer
+        url:
+          description: URL is the pre-signed URL for uploading this specific part
+          type: string
+      type: object
+    files.UploadResponse:
+      properties:
+        path:
+          description: Path is the storage path where the file will be accessible after
+            upload (e.g., "user-files/model_weights.bin")
+          type: string
+        upload_url:
+          allOf:
+            - $ref: "#/components/schemas/files.UploadURL"
+          description: UploadURL contains the signed URL and metadata needed to upload the
+            file
+      type: object
+    files.UploadURL:
+      properties:
+        form_data:
+          additionalProperties:
+            type: string
+          description: FormData contains form fields required for multipart/form-data
+            uploads (if applicable)
+          type: object
+        headers:
+          additionalProperties:
+            type: string
+          description: Headers are the required HTTP headers to include in the upload
+            request
+          type: object
+        method:
+          description: Method is the HTTP method to use (typically "PUT" or "POST")
+          type: string
+        url:
+          description: URL is the pre-signed URL endpoint for uploading the file
+          type: string
+      type: object
+    ContainerStatus:
+      properties:
+        finishedAt:
+          description: FinishedAt is the timestamp when the container finished execution
+            (if terminated)
+          type: string
+        message:
+          description: Message provides a human-readable message with details about the
+            container's status
+          type: string
+        name:
+          description: Name is the name of the container
+          type: string
+        reason:
+          description: Reason provides a brief machine-readable reason for the container's
+            current status
+          type: string
+        startedAt:
+          description: StartedAt is the timestamp when the container started execution
+          type: string
+        status:
+          description: Status is the current state of the container (e.g., "Running",
+            "Terminated", "Waiting")
+          type: string
+      type: object
+    CreateDeploymentRequest:
+      properties:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
+          type: string
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. Each must have a name and either a value or
+            value_from_secret
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance.
+            Defaults to 0 if not specified
+          type: integer
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). If set, the platform will check this endpoint to
+            determine container health
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of container instances that can
+            be scaled up to. If not set, will be set to MinReplicas
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of container instances to run.
+            Defaults to 1 if not specified
+          type: integer
+        name:
+          description: Name is the unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers). Required if your application serves traffic
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. Each
+            mount must reference an existing volume by name
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      required:
+        - gpu_type
+        - image
+        - name
+      type: object
+    CreateSecretRequest:
+      properties:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
+          type: string
+        name:
+          description: Name is the unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the sensitive data to store securely (e.g., API keys,
+            passwords, tokens). This value will be encrypted at rest
+          minLength: 1
+          type: string
+      required:
+        - name
+        - value
+      type: object
+    CreateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content configuration for this volume
+        name:
+          description: Name is the unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (currently only "readOnly" is supported)
+      required:
+        - content
+        - name
+        - type
+      type: object
+    DeploymentListResponse:
+      properties:
+        data:
+          description: Data is the array of deployment items
+          items:
+            $ref: "#/components/schemas/v1.DeploymentResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    DeploymentLogs:
+      properties:
+        lines:
+          items:
+            type: string
+          type: array
+      type: object
+    DeploymentResponseItem:
+      properties:
+        args:
+          description: Args are the arguments passed to the container's command
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: Autoscaling contains autoscaling configuration parameters for this
+            deployment
+          type: object
+        command:
+          description: Command is the entrypoint command run in the container
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the amount of CPU resource allocated to each replica in
+            cores (fractional value is allowed)
+          type: number
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this deployment was created
+          type: string
+        description:
+          description: Description provides a human-readable explanation of the
+            deployment's purpose or content
+          type: string
+        desired_replicas:
+          description: DesiredReplicas is the number of replicas that the orchestrator is
+            targeting
+          type: integer
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables set in the
+            container
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs allocated to each replica in this
+            deployment
+          type: integer
+        gpu_type:
+          description: GPUType specifies the type of GPU requested (if any) for this
+            deployment
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path used for health checks of the
+            application
+          type: string
+        id:
+          description: ID is the unique identifier of the deployment
+          type: string
+        image:
+          description: Image specifies the container image used for this deployment
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas to run for this
+            deployment
+          type: integer
+        memory:
+          description: Memory is the amount of memory allocated to each replica in GiB
+            (fractional value is allowed)
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run for this
+            deployment
+          type: integer
+        name:
+          description: Name is the name of the deployment
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "deployment")
+          type: string
+        port:
+          description: Port is the container port that the deployment exposes
+          type: integer
+        ready_replicas:
+          description: ReadyReplicas is the current number of replicas that are in the
+            Ready state
+          type: integer
+        replica_events:
+          additionalProperties:
+            $ref: "#/components/schemas/v1.ReplicaEvent"
+          description: ReplicaEvents is a mapping of replica names or IDs to their status
+            events
+          type: object
+        status:
+          allOf:
+            - $ref: "#/components/schemas/v1.DeploymentStatus"
+          description: Status represents the overall status of the deployment (e.g.,
+            Updating, Scaling, Ready, Failed)
+          enum:
+            - Updating
+            - Scaling
+            - Ready
+            - Failed
+        storage:
+          description: Storage is the amount of storage (in MB or units as defined by the
+            platform) allocated to each replica
+          type: integer
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this deployment was last
+            updated
+          type: string
+        volumes:
+          description: Volumes is a list of volume mounts for this deployment
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      type: object
+    DeploymentStatus:
+      enum:
+        - Updating
+        - Scaling
+        - Ready
+        - Failed
+      type: string
+      x-enum-varnames:
+        - DeploymentStatusUpdating
+        - DeploymentStatusScaling
+        - DeploymentStatusReady
+        - DeploymentStatusFailed
+    EnvironmentVariable:
+      properties:
+        name:
+          description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
+            start with a letter or underscore, followed by letters, numbers, or
+            underscores
+          type: string
+        value:
+          description: Value is the plain text value for the environment variable. Use
+            this for non-sensitive values. Either Value or ValueFromSecret must
+            be set, but not both
+          type: string
+        value_from_secret:
+          description: ValueFromSecret references a secret by name or ID to use as the
+            value. Use this for sensitive values like API keys or passwords.
+            Either Value or ValueFromSecret must be set, but not both
+          type: string
+      required:
+        - name
+      type: object
+    ImageListResponse:
+      properties:
+        data:
+          description: Data is the array of image items
+          items:
+            $ref: "#/components/schemas/v1.ImageResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ImageResponseItem:
+      properties:
+        object:
+          description: Object is the type identifier for this response (always "image")
+          type: string
+        tag:
+          description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
+          type: string
+        url:
+          description: URL is the full registry URL for this image including tag (e.g.,
+            "registry.together.ai/project-id/repository:tag")
+          type: string
+      type: object
+    KubernetesEvent:
+      properties:
+        action:
+          description: Action is the action taken or reported by this event
+          type: string
+        count:
+          description: Count is the number of times this event has occurred
+          type: integer
+        first_seen:
+          description: FirstSeen is the timestamp when this event was first observed
+          type: string
+        last_seen:
+          description: LastSeen is the timestamp when this event was last observed
+          type: string
+        message:
+          description: Message is a human-readable description of the event
+          type: string
+        reason:
+          description: Reason is a brief machine-readable reason for this event (e.g.,
+            "Pulling", "Started", "Failed")
+          type: string
+      type: object
+    ListSecretsResponse:
+      properties:
+        data:
+          description: Data is the array of secret items
+          items:
+            $ref: "#/components/schemas/v1.SecretResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ListVolumesResponse:
+      properties:
+        data:
+          description: Data is the array of volume items
+          items:
+            $ref: "#/components/schemas/v1.VolumeResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ReplicaEvent:
+      properties:
+        container_status:
+          allOf:
+            - $ref: "#/components/schemas/v1.ContainerStatus"
+          description: ContainerStatus provides detailed status information about the
+            container within this replica
+        events:
+          description: Events is a list of Kubernetes events related to this replica for
+            troubleshooting
+          items:
+            $ref: "#/components/schemas/v1.KubernetesEvent"
+          type: array
+        replica_completed_at:
+          description: ReplicaCompletedAt is the timestamp when the replica finished
+            execution
+          type: string
+        replica_marked_for_termination_at:
+          description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
+            marked for termination
+          type: string
+        replica_ready_since:
+          description: ReplicaReadySince is the timestamp when the replica became ready to
+            serve traffic
+          type: string
+        replica_running_since:
+          description: ReplicaRunningSince is the timestamp when the replica entered the
+            running state
+          type: string
+        replica_started_at:
+          description: ReplicaStartedAt is the timestamp when the replica was created
+          type: string
+        replica_status:
+          description: ReplicaStatus is the current status of the replica (e.g.,
+            "Running", "Pending", "Failed")
+          type: string
+        replica_status_message:
+          description: ReplicaStatusMessage provides a human-readable message explaining
+            the replica's status
+          type: string
+        replica_status_reason:
+          description: ReplicaStatusReason provides a brief machine-readable reason for
+            the replica's status
+          type: string
+        scheduled_on_cluster:
+          description: ScheduledOnCluster identifies which cluster this replica is
+            scheduled on
+          type: string
+      type: object
+    RepositoryListResponse:
+      properties:
+        data:
+          description: Data is the array of repository items
+          items:
+            $ref: "#/components/schemas/v1.RepositoryResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    RepositoryResponseItem:
+      properties:
+        id:
+          description: ID is the unique identifier for this repository (repository name
+            with slashes replaced by "___")
+          type: string
+        object:
+          description: Object is the type identifier for this response (always
+            "image-repository")
+          type: string
+        url:
+          description: URL is the full registry URL for this repository (e.g.,
+            "registry.together.ai/project-id/repository-name")
+          type: string
+      type: object
+    SecretResponseItem:
+      properties:
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this secret was created
+          type: string
+        created_by:
+          description: CreatedBy is the identifier of the user who created this secret
+          type: string
+        description:
+          description: Description is a human-readable description of the secret's purpose
+          type: string
+        id:
+          description: ID is the unique identifier for this secret
+          type: string
+        last_updated_by:
+          description: LastUpdatedBy is the identifier of the user who last updated this
+            secret
+          type: string
+        name:
+          description: Name is the name/key of the secret
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "secret")
+          type: string
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
+          type: string
+      type: object
+    UpdateDeploymentRequest:
+      properties:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
+          type: string
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. This will replace all existing environment variables
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance
+          type: integer
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). Set to empty string to disable health checks
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas that can be scaled up
+            to.
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run
+          type: integer
+        name:
+          description: Name is the new unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers)
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. This
+            will replace all existing volumes
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      type: object
+    UpdateSecretRequest:
+      properties:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
+          type: string
+        name:
+          description: Name is the new unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the new sensitive data to store securely. Updating this
+            will replace the existing secret value
+          minLength: 1
+          type: string
+      type: object
+    UpdateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the new content that will be preloaded to this
+            volume
+        name:
+          description: Name is the new unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the new volume type (currently only "readOnly" is supported)
+      type: object
+    VolumeMount:
+      properties:
+        mount_path:
+          description: MountPath is the path in the container where the volume will be
+            mounted (e.g., "/data")
+          type: string
+        name:
+          description: Name is the name of the volume to mount. Must reference an existing
+            volume by name or ID
+          type: string
+      required:
+        - mount_path
+        - name
+      type: object
+    VolumeResponseItem:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content that will be preloaded to this volume
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this volume was created
+          type: string
+        id:
+          description: ID is the unique identifier for this volume
+          type: string
+        name:
+          description: Name is the name of the volume
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "volume")
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (e.g., "readOnly")
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
+          type: string
+      type: object
+    volumes.VolumeContent:
+      properties:
+        source_prefix:
+          description: SourcePrefix is the file path prefix for the content to be
+            preloaded into the volume
+          example: models/
+          type: string
+        type:
+          description: Type is the content type (currently only "files" is supported which
+            allows preloading files uploaded via Files API into the volume)
+          enum:
+            - files
+          example: files
+          type: string
+      type: object
+    volumes.VolumeType:
+      enum:
+        - readOnly
+      type: string
+      x-enum-varnames:
+        - VolumeTypeReadOnly
     ListVoicesResponse:
       description: Response containing a list of models and their available voices.
       type: object