openapi.yaml

openapi: 3.1.0
info:
  title: Together APIs
  description: The Together REST API. Please see https://docs.together.ai for more details.
  version: "2.0.0"
  termsOfService: https://www.together.ai/terms-of-service
  contact:
    name: Together Support
    url: https://www.together.ai/contact
  license:
    name: MIT
    url: https://github.com/togethercomputer/openapi/blob/main/LICENSE
servers:
  - url: https://api.together.xyz/v1
security:
  - bearerAuth: []
paths:
  /chat/completions:
    post:
      tags: ["Chat"]
      summary: Create chat completion
      description: Query a chat model.
      operationId: chat-completions
      requestBody:
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ChatCompletionRequest"
      responses:
        "200":
          description: "200"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ChatCompletionResponse"
            text/event-stream:
              schema:
                $ref: "#/components/schemas/ChatCompletionStream"
        "400":
          description: "BadRequest"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "401":
          description: "Unauthorized"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "404":
          description: "NotFound"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "429":
          description: "RateLimit"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "503":
          description: "Overloaded"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "504":
          description: "Timeout"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
      deprecated: false
  /completions:
    post:
      tags: ["Completion"]
      summary: Create completion
      description: Query a language, code, or image model.
      operationId: completions
      requestBody:
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/CompletionRequest"
      responses:
        "200":
          description: "200"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CompletionResponse"
            text/event-stream:
              schema:
                $ref: "#/components/schemas/CompletionStream"
        "400":
          description: "BadRequest"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "401":
          description: "Unauthorized"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "404":
          description: "NotFound"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "429":
          description: "RateLimit"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "503":
          description: "Overloaded"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "504":
          description: "Timeout"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
      deprecated: false
  /embeddings:
    post:
      tags: ["Embeddings"]
      summary: Create embedding
      description: Query an embedding model for a given string of text.
      operationId: embeddings
      requestBody:
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/EmbeddingsRequest"
      responses:
        "200":
          description: "200"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/EmbeddingsResponse"
        "400":
          description: "BadRequest"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "401":
          description: "Unauthorized"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "404":
          description: "NotFound"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "429":
          description: "RateLimit"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "503":
          description: "Overloaded"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "504":
          description: "Timeout"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
      deprecated: false
  /models:
    get:
      tags: ["Models"]
      summary: List all models
      description: Lists all of Together's open-source models
      operationId: models
      responses:
        "200":
          description: "200"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ModelInfoList"
        "400":
          description: "BadRequest"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "401":
          description: "Unauthorized"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "404":
          description: "NotFound"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "429":
          description: "RateLimit"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "504":
          description: "Timeout"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
      deprecated: false
  /images/generations:
    post:
      tags: ["Images"]
      summary: Create image
      description: Use an image model to generate an image for a given prompt.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - prompt
                - model
              properties:
                prompt:
                  type: string
                  description: A description of the desired images. Maximum length varies by model.
                  example: cat floating in space, cinematic
                model:
                  type: string
                  description: >
                    The model to use for image generation.<br>
                    <br>
                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
                  example: black-forest-labs/FLUX.1-schnell
                  anyOf:
                    - type: string
                      enum:
                        - black-forest-labs/FLUX.1-schnell-Free
                        - black-forest-labs/FLUX.1-schnell
                        - black-forest-labs/FLUX.1.1-pro
                    - type: string
                steps:
                  type: integer
                  default: 20
                  description: Number of generation steps.
                image_url:
                  type: string
                  description: URL of an image to use for image models that support it.
                seed:
                  type: integer
                  description: Seed used for generation. Can be used to reproduce image generations.
                n:
                  type: integer
                  default: 1
                  description: Number of image results to generate.
                height:
                  type: integer
                  default: 1024
                  description: Height of the image to generate in number of pixels.
                width:
                  type: integer
                  default: 1024
                  description: Width of the image to generate in number of pixels.
                negative_prompt:
                  type: string
                  description: The prompt or prompts not to guide the image generation.
                response_format:
                  type: string
                  description: Format of the image response. Can be either a base64 string or a URL.
                  enum:
                    - base64
                    - url
      responses:
        "200":
          description: Image generated successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ImageResponse"
  /files:
    get:
      tags: ["Files"]
      summary: List all files
      description: List the metadata for all uploaded data files.
      responses:
        "200":
          description: List of files
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FileList"
  /files/{id}:
    get:
      tags: ["Files"]
      summary: List file
      description: List the metadata for a single uploaded data file.
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: File retrieved successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FileResponse"
    delete:
      tags: ["Files"]
      summary: Delete a file
      description: Delete a previously uploaded data file.
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: File deleted successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FileDeleteResponse"
  /files/{id}/content:
    get:
      tags: ["Files"]
      summary: Get file contents
      description: Get the contents of a single uploaded data file.
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: File content retrieved successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FileObject"
        "500":
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
  /fine-tunes:
    post:
      tags: ["Fine-tuning"]
      summary: Create job
      description: Use a model to create a fine-tuning job.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - training_file
                - model
              properties:
                training_file:
                  type: string
                  description: File-ID of a training file uploaded to the Together API
                validation_file:
                  type: string
                  description: File-ID of a validation file uploaded to the Together API
                model:
                  type: string
                  description: Name of the base model to run fine-tune job on
                n_epochs:
                  type: integer
                  default: 1
                  description: Number of epochs for fine-tuning
                n_checkpoints:
                  type: integer
                  default: 1
                  description: Number of checkpoints to save during fine-tuning
                n_evals:
                  type: integer
                  default: 0
                  description: Number of evaluations to be run on a given validation set during training
                batch_size:
                  type: integer
                  default: 32
                  description: Batch size for fine-tuning
                learning_rate:
                  type: number
                  format: float
                  default: 0.00001
                  description: Learning rate multiplier to use for training
                lr_scheduler:
                  type: object
                  default: none
                  $ref: "#/components/schemas/LRScheduler"
                warmup_ratio:
                  type: number
                  format: float
                  default: 0.0
                  description: The percent of steps at the start of training to linearly increase the learning rate.
                max_grad_norm:
                  type: number
                  format: float
                  default: 1.0
                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
                weight_decay:
                  type: number
                  format: float
                  default: 0.0
                  description: Weight decay
                suffix:
                  type: string
                  description: Suffix that will be added to your fine-tuned model name
                wandb_api_key:
                  type: string
                  description: API key for Weights & Biases integration
                wandb_base_url:
                  type: string
                  description: The base URL of a dedicated Weights & Biases instance.
                wandb_project_name:
                  type: string
                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
                wandb_name:
                  type: string
                  description: The Weights & Biases name for your run.
                train_on_inputs:
                  oneOf:
                    - type: boolean
                    - type: string
                      enum:
                        - auto
                  type: boolean
                  default: auto
                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
                training_type:
                  type: object
                  oneOf:
                    - $ref: "#/components/schemas/FullTrainingType"
                    - $ref: "#/components/schemas/LoRATrainingType"
      responses:
        "200":
          description: Fine-tuning job initiated successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneResponse"
    get:
      tags: ["Fine-tuning"]
      summary: List all jobs
      description: List the metadata for all fine-tuning jobs.
      responses:
        "200":
          description: List of fine-tune jobs
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneList"
  /fine-tunes/{id}:
    get:
      tags: ["Fine-tuning"]
      summary: List job
      description: List the metadata for a single fine-tuning job.
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: Fine-tune job details retrieved successfully
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneResponse"
  /fine-tunes/{id}/events:
    get:
      tags: ["Fine-tuning"]
      summary: List job events
      description: List the events for a single fine-tuning job.
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: List of fine-tune events
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneListEvents"
  /finetune/download:
    get:
      tags: ["Fine-tuning"]
      summary: Download model
      description: Download a compressed fine-tuned model or checkpoint to local disk.
      parameters:
        - in: query
          name: ft_id
          schema:
            type: string
          required: true
          description: Fine-tune ID to download. A string that starts with `ft-`.
        - in: query
          name: checkpoint_step
          schema:
            type: integer
          required: false
          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
        - in: query
          name: checkpoint
          schema:
            type: string
            enum:
              - merged
              - adapter
          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
        - in: query
          name: output
          schema:
            type: string
          required: false
          description: Specifies output file name for downloaded model. Defaults to `$PWD/{model_name}.{extension}`.
      responses:
        "200":
          description: Successfully downloaded the fine-tuned model or checkpoint.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneDownloadResult"
        "400":
          description: Invalid request parameters.
        "404":
          description: Fine-tune ID not found.
  /fine-tunes/{id}/cancel:
    post:
      tags: ["Fine-tuning"]
      summary: Cancel job
      description: Cancel a currently running fine-tuning job.
      parameters:
        - in: path
          name: id
          schema:
            type: string
          required: true
          description: Fine-tune ID to cancel. A string that starts with `ft-`.
      responses:
        "200":
          description: Successfully cancelled the fine-tuning job.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FinetuneResponse"
        "400":
          description: Invalid request parameters.
        "404":
          description: Fine-tune ID not found.
  /rerank:
    post:
      tags: ["Rerank"]
      summary: Create a rerank request
      description: Query a reranker model
      operationId: rerank
      requestBody:
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/RerankRequest"
      responses:
        "200":
          description: "200"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/RerankResponse"
        "400":
          description: "BadRequest"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "401":
          description: "Unauthorized"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "404":
          description: "NotFound"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "429":
          description: "RateLimit"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "503":
          description: "Overloaded"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
        "504":
          description: "Timeout"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorData"
      deprecated: false
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      x-bearer-format: bearer
      x-default: default

  schemas:
    RerankRequest:
      type: object
      properties:
        model:
          type: string
          description: >
            The model to be used for the rerank request.<br>
            <br>
            [See all of Together AI's rerank models](https://docs.together.ai/docs/serverless-models#rerank-models)
          example: Salesforce/Llama-Rank-V1
          anyOf:
            - type: string
              enum:
                - Salesforce/Llama-Rank-v1
            - type: string

        query:
          type: string
          description: The search query to be used for ranking.
          example: What animals can I find near Peru?
        documents:
          description: List of documents, which can be either strings or objects.
          oneOf:
            - type: array
              items:
                type: object
                additionalProperties: true
            - type: array
              items:
                type: string
                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
          example:
            - {
                "title": "Llama",
                "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.",
              }
            - {
                "title": "Panda",
                "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China.",
              }
            - {
                "title": "Guanaco",
                "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations.",
              }
            - {
                "title": "Wild Bactrian camel",
                "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia.",
              }
        top_n:
          type: integer
          description: The number of top results to return.
          example: 2
        return_documents:
          type: boolean
          description: Whether to return supplied documents with the response.
          example: true
        rank_fields:
          type: array
          items:
            type: string
          description: List of keys in the JSON Object document to rank by. Defaults to use all supplied keys for ranking.
          example: ["title", "text"]
      required:
        - model
        - query
        - documents
      additionalProperties: false

    RerankResponse:
      type: object
      required:
        - object
        - model
        - results
      properties:
        object:
          type: string
          description: Object type
          enum:
            - rerank
          example: rerank
        id:
          type: string
          description: Request ID
          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
        model:
          type: string
          description: The model to be used for the rerank request.
          example: salesforce/turboranker-0.8-3778-6328
        results:
          type: array
          items:
            type: object
            required: [index, relevance_score, document]
            properties:
              index:
                type: integer
              relevance_score:
                type: number
              document:
                type: object
                properties:
                  text:
                    type: string
                    nullable: true
          example:
            - {
                "index": 0,
                "relevance_score": 0.29980177813003117,
                "document":
                  {
                    "text": '{"title":"Llama","text":"The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."}',
                  },
              }
            - {
                "index": 2,
                "relevance_score": 0.2752447527354349,
                "document":
                  {
                    "text": '{"title":"Guanaco","text":"The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."}',
                  },
              }
        usage:
          $ref: "#/components/schemas/UsageData"
          example:
            {
              "prompt_tokens": 1837,
              "completion_tokens": 0,
              "total_tokens": 1837,
            }

    ErrorData:
      type: object
      required:
        - error
      properties:
        error:
          type: object
          properties:
            message:
              type: string
              nullable: false
            type:
              type: string
              nullable: false
            param:
              type: string
              nullable: true
              default: null
            code:
              type: string
              nullable: true
              default: null
          required:
            - type
            - message
            - param
            - code

    FinishReason:
      type: string
      enum:
        - stop
        - eos
        - length
        - tool_calls
        - function_call

    LogprobsPart:
      type: object
      properties:
        token_ids:
          type: array
          items:
            type: number
          description: List of token IDs corresponding to the logprobs
        tokens:
          type: array
          items:
            type: string
          description: List of token strings
        token_logprobs:
          type: array
          items:
            type: number
          description: List of token log probabilities

    PromptPart:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
            example: <s>[INST] What is the capital of France? [/INST]
          logprobs:
            $ref: "#/components/schemas/LogprobsPart"

    UsageData:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer
      required:
        - prompt_tokens
        - completion_tokens
        - total_tokens
      nullable: true

    CompletionChoicesData:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
            example: The capital of France is Paris. It's located in the north-central part of the country and is one of the most populous and visited cities in the world, known for its iconic landmarks like the Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and more. Paris is also the capital of the Île-de-France region and is a major global center for art, fashion, gastronomy, and culture.
          seed:
            type: integer
          finish_reason:
            $ref: "#/components/schemas/FinishReason"
          logprobs:
            type: object
            $ref: "#/components/schemas/LogprobsPart"

    CompletionRequest:
      type: object
      required:
        - model
        - prompt
      properties:
        prompt:
          type: string
          description: A string providing context for the model to complete.
          example: <s>[INST] What is the capital of France? [/INST]
        model:
          type: string
          description: >
            The name of the model to query.<br>
            <br>
            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
          example: mistralai/Mixtral-8x7B-Instruct-v0.1
          anyOf:
            - type: string
              enum:
                - meta-llama/Llama-2-70b-hf
                - mistralai/Mistral-7B-v0.1
                - mistralai/Mixtral-8x7B-v0.1
                - Meta-Llama/Llama-Guard-7b
            - type: string
        max_tokens:
          type: integer
          description: The maximum number of tokens to generate.
        stop:
          type: array
          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
          items:
            type: string
        temperature:
          type: number
          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
          format: float
        top_p:
          type: number
          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
          format: float
        top_k:
          type: integer
          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
          format: int32
        repetition_penalty:
          type: number
          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
          format: float
        stream:
          type: boolean
          description: "If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results."
        logprobs:
          type: integer
          minimum: 0
          maximum: 1
          description: Determines the number of most likely tokens to return at each token position log probabilities to return.
        echo:
          type: boolean
          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
        n:
          type: integer
          description: The number of completions to generate for each prompt.
          minimum: 1
          maximum: 128
        safety_model:
          type: string
          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
          example: "safety_model_name"
          anyOf:
            - type: string
              enum:
                - Meta-Llama/Llama-Guard-7b
            - type: string
        min_p:
          type: number
          description: A number between 0 and 1 that can be used as an alternative to top-p and top-k.
          format: float
        presence_penalty:
          type: number
          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
          format: float
        frequency_penalty:
          type: number
          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
          format: float
        logit_bias:
          type: object
          additionalProperties:
            type: number

            format: float
          description: Adjusts the likelihood of specific tokens appearing in the generated output.
          example: { "1024": -10.5, "105": 21.4 }
        seed:
          type: integer
          description: Seed value for reproducibility.
          example: 42
    CompletionResponse:
      type: object
      properties:
        id:
          type: string
        choices:
          $ref: "#/components/schemas/CompletionChoicesData"
        prompt:
          $ref: "#/components/schemas/PromptPart"
        usage:
          $ref: "#/components/schemas/UsageData"
        created:
          type: integer
        model:
          type: string
        object:
          type: string
          enum:
            - text_completion
      required:
        - id
        - choices
        - usage
        - created
        - model
        - object

    CompletionStream:
      oneOf:
        - $ref: "#/components/schemas/CompletionEvent"
        - $ref: "#/components/schemas/StreamSentinel"

    CompletionEvent:
      type: object
      required: [data]
      properties:
        data:
          $ref: "#/components/schemas/CompletionChunk"

    CompletionChunk:
      type: object
      required: [id, token, choices, usage, finish_reason]
      properties:
        id:
          type: string
        token:
          $ref: "#/components/schemas/CompletionToken"
        choices:
          title: CompletionChoices
          type: array
          items:
            $ref: "#/components/schemas/CompletionChoice"
        usage:
          allOf:
            - $ref: "#/components/schemas/UsageData"
            - nullable: true
        seed:
          type: integer
        finish_reason:
          allOf:
            - $ref: "#/components/schemas/FinishReason"
            - nullable: true

    CompletionChoice:
      type: object
      required: [index]
      properties:
        text:
          type: string

    CompletionToken:
      type: object
      required: [id, text, logprob, special]
      properties:
        id:
          type: integer
        text:
          type: string
        logprob:
          type: number
        special:
          type: boolean

    ChatCompletionChoicesData:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
          index:
            type: integer
          seed:
            type: integer
          finish_reason:
            $ref: "#/components/schemas/FinishReason"
          message:
            $ref: "#/components/schemas/ChatCompletionMessage"
          logprobs:
            allOf:
              - nullable: true
              - $ref: "#/components/schemas/LogprobsPart"
    ChatCompletionMessage:
      type: object
      required: [role, content]
      properties:
        content:
          type: string
          nullable: true
        role:
          type: string
          enum: [assistant]
        tool_calls:
          type: array
          items:
            $ref: "#/components/schemas/ToolChoice"
        function_call:
          type: object
          deprecated: true
          required: [arguments, name]
          properties:
            arguments:
              type: string
            name:
              type: string
    ChatCompletionTool:
      type: object
      required: [type, function]
      properties:
        type:
          type: string
          enum: ["function"]
        function:
          type: object
          required: [name]
          properties:
            description:
              type: string
            name:
              type: string
            parameters:
              type: object
              additionalProperties: true

    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        messages:
          type: array
          description: A list of messages comprising the conversation so far.
          items:
            type: object
            properties:
              role:
                type: string
                description: "The role of the messages author. Choice between: system, user, or assistant."
                enum:
                  - system
                  - user
                  - assistant
                  - tool
              content:
                description: The content of the message, which can either be a simple string or a structured format.
                type: string
                oneOf:
                  - type: string
                    description: A plain text message.
                  - type: array
                    description: A structured message with mixed content types.
                    items:
                      type: object
                      oneOf:
                        - type: object
                          properties:
                            type:
                              type: string
                              enum:
                                - text
                            text:
                              type: string
                          required:
                            - type
                            - text
                        - type: object
                          properties:
                            type:
                              type: string
                              enum:
                                - image_url
                            image_url:
                              type: object
                              properties:
                                url:
                                  type: string
                                  description: The URL of the image as a plain string.
                              required:
                                - url
                          required:
                            - type
                            - image_url
            required:
              - role
              - content
        model:
          description: >
            The name of the model to query.<br>
            <br>
            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
          example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
          anyOf:
            - type: string
              enum:
                - Qwen/Qwen2.5-72B-Instruct-Turbo
                - Qwen/Qwen2.5-7B-Instruct-Turbo
                - meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
                - meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
                - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
            - type: string
        max_tokens:
          type: integer
          description: The maximum number of tokens to generate.
        stop:
          type: array
          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
          items:
            type: string
        temperature:
          type: number
          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
          format: float
        top_p:
          type: number
          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
          format: float
        top_k:
          type: integer
          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
          format: int32
        repetition_penalty:
          type: number
          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
        stream:
          type: boolean
          description: "If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results."
        logprobs:
          type: integer
          minimum: 0
          maximum: 1
          description: Determines the number of most likely tokens to return at each token position log probabilities to return.
        echo:
          type: boolean
          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
        n:
          type: integer
          description: The number of completions to generate for each prompt.
          minimum: 1
          maximum: 128
        min_p:
          type: number
          description: A number between 0 and 1 that can be used as an alternative to top_p and top-k.
          format: float
        presence_penalty:
          type: number
          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
          format: float
        frequency_penalty:
          type: number
          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
          format: float
        logit_bias:
          type: object
          additionalProperties:
            type: number
            format: float
          description: Adjusts the likelihood of specific tokens appearing in the generated output.
          example: { "1024": -10.5, "105": 21.4 }
        seed:
          type: integer
          description: Seed value for reproducibility.
          example: 42
        function_call:
          oneOf:
            - type: string
              enum: [none, auto]
            - type: object
              required: [name]
              properties:
                name:
                  type: string
        response_format:
          type: object
          description: An object specifying the format that the model must output.
          properties:
            type:
              type: string
              description: The type of the response format.
              example: json
            schema:
              type: object
              additionalProperties:
                type: string
              description: The schema of the response format.
        tools:
          type: array
          description: A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.
          items:
            $ref: "#/components/schemas/ToolsPart"
        tool_choice:
          description: Controls which (if any) function is called by the model. By default uses `auto`, which lets the model pick between generating a message or calling a function.
          oneOf:
            - type: string
              example: "tool_name"
            - $ref: "#/components/schemas/ToolChoice"
        safety_model:
          type: string
          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
          example: "safety_model_name"

    ChatCompletionMessageParam:
      oneOf:
        - $ref: "#/components/schemas/ChatCompletionSystemMessageParam"
        - $ref: "#/components/schemas/ChatCompletionUserMessageParam"
        - $ref: "#/components/schemas/ChatCompletionAssistantMessageParam"
        - $ref: "#/components/schemas/ChatCompletionToolMessageParam"
        - $ref: "#/components/schemas/ChatCompletionFunctionMessageParam"

    # Start Message Params

    ChatCompletionSystemMessageParam:
      type: object
      required: [content, role]
      properties:
        content:
          type: string
        role:
          type: string
          enum: ["system"]
        name:
          type: string

    ChatCompletionUserMessageParam:
      type: object
      required: [content, role]
      properties:
        content:
          type: string
          # TODO: more comple content?
        role:
          type: string
          enum: ["user"]
        name:
          type: string

    ChatCompletionAssistantMessageParam:
      type: object
      required: [role]
      properties:
        content:
          type: string
          nullable: true
        role:
          type: string
          enum: ["assistant"]
        name:
          type: string
        tool_calls:
          type: array
          items:
            $ref: "#/components/schemas/ToolChoice"
        function_call:
          type: object
          deprecated: true
          properties:
            arguments:
              type: string
            name:
              type: string
          required: [arguments, name]

    ChatCompletionFunctionMessageParam:
      type: object
      deprecated: true
      required: [content, role, name]
      properties:
        role:
          type: string
          enum: ["function"]
        content:
          type: string
        name:
          type: string

    ChatCompletionToolMessageParam:
      type: object
      properties:
        role:
          type: string
          enum: ["tool"]
        content:
          type: string
        tool_call_id:
          type: string
      required: [role, content, tool_call_id]

    # End Message Params

    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        choices:
          $ref: "#/components/schemas/ChatCompletionChoicesData"
        usage:
          $ref: "#/components/schemas/UsageData"
        created:
          type: integer
        model:
          type: string
        object:
          type: string
          enum:
            - chat.completion
      required: [choices, id, created, model, object]

    ChatCompletionStream:
      oneOf:
        - $ref: "#/components/schemas/ChatCompletionEvent"
        - $ref: "#/components/schemas/StreamSentinel"

    ChatCompletionEvent:
      type: object
      required: [data]
      properties:
        data:
          $ref: "#/components/schemas/ChatCompletionChunk"

    ChatCompletionChunk:
      type: object
      required: [id, object, created, choices, model]
      properties:
        id:
          type: string
        object:
          type: string
          enum:
            - chat.completion.chunk
        created:
          type: integer
        system_fingerprint:
          type: string
        model:
          type: string
          example: mistralai/Mixtral-8x7B-Instruct-v0.1
        choices:
          title: ChatCompletionChoices
          type: array
          items:
            type: object
            required: [index, delta, finish_reason]
            properties:
              index:
                type: integer
              finish_reason:
                $ref: "#/components/schemas/FinishReason"
                nullable: true
              logprobs:
                type: number
                nullable: true
              seed:
                type: integer
                nullable: true
              delta:
                title: ChatCompletionChoiceDelta
                type: object
                required: [role]
                properties:
                  token_id:
                    type: integer
                  role:
                    type: string
                    enum: ["system", "user", "assistant", "function", "tool"]
                  content:
                    type: string
                    nullable: true
                  tool_calls:
                    type: array
                    items:
                      $ref: "#/components/schemas/ToolChoice"
                  function_call:
                    type: object
                    deprecated: true
                    nullable: true
                    properties:
                      arguments:
                        type: string
                      name:
                        type: string
                    required:
                      - arguments
                      - name
        usage:
          allOf:
            - $ref: "#/components/schemas/UsageData"
            - nullable: true

    StreamSentinel:
      type: object
      required: [data]
      properties:
        data:
          title: stream_signal
          type: string
          enum:
            - "[DONE]"

    ChatCompletionToken:
      type: object
      required: [id, text, logprob, special]
      properties:
        id:
          type: integer
        text:
          type: string
        logprob:
          type: number
          format: float
        special:
          type: boolean

    ChatCompletionChoice:
      type: object
      required: [index, delta, finish_reason]
      properties:
        index:
          type: integer
        finish_reason:
          $ref: "#/components/schemas/FinishReason"
        logprobs:
          $ref: "#/components/schemas/LogprobsPart"
        delta:
          title: ChatCompletionChoiceDelta
          type: object
          required: [role]
          properties:
            token_id:
              type: integer
            role:
              type: string
              enum: ["system", "user", "assistant", "function", "tool"]
            content:
              type: string
              nullable: true
            tool_calls:
              type: array
              items:
                $ref: "#/components/schemas/ToolChoice"
            function_call:
              type: object
              deprecated: true
              nullable: true
              properties:
                arguments:
                  type: string
                name:
                  type: string
              required:
                - arguments
                - name

    EmbeddingsRequest:
      type: object
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: >
            The name of the embedding model to use.<br>
            <br>
            [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
          example: togethercomputer/m2-bert-80M-8k-retrieval
          anyOf:
            - type: string
              enum:
                - WhereIsAI/UAE-Large-V1
                - BAAI/bge-large-en-v1.5
                - BAAI/bge-base-en-v1.5
                - togethercomputer/m2-bert-80M-8k-retrieval
            - type: string
        input:
          oneOf:
            - type: string
              description: A string providing the text for the model to embed.
              example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
            - type: array
              items:
                type: string
                description: A string providing the text for the model to embed.
                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph

    EmbeddingsResponse:
      type: object
      required:
        - object
        - model
        - data
      properties:
        object:
          type: string
          enum:
            - list
        model:
          type: string
        data:
          type: array
          items:
            type: object
            required: [index, object, embedding]
            properties:
              object:
                type: string
                enum:
                  - embedding
              embedding:
                type: array
                items:
                  type: number
              index:
                type: integer

    ModelInfoList:
      type: array
      items:
        $ref: "#/components/schemas/ModelInfo"
    ModelInfo:
      type: object
      required: [id, object, created, type]
      properties:
        id:
          type: string
          example: "Austism/chronos-hermes-13b"
        object:
          type: string
          example: "model"
        created:
          type: integer
          example: 1692896905
        type:
          enum:
            - chat
            - language
            - code
            - image
            - embedding
            - moderation
            - rerank
          example: "chat"
        display_name:
          type: string
          example: "Chronos Hermes (13B)"
        organization:
          type: string
          example: "Austism"
        link:
          type: string
        license:
          type: string
          example: "other"
        context_length:
          type: integer
          example: 2048
        pricing:
          $ref: "#/components/schemas/Pricing"
    ImageResponse:
      type: object
      properties:
        id:
          type: string
        model:
          type: string
        object:
          enum:
            - list
          example: "list"
        data:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              b64_json:
                type: string
              url:
                type: string
            required:
              - index
            oneOf:
              - required:
                  - b64_json
              - required:
                  - url
      required:
        - id
        - model
        - object
        - data
    Pricing:
      type: object
      required: [hourly, input, output, base, finetune]
      properties:
        hourly:
          type: number
          example: 0
        input:
          type: number
          example: 0.3
        output:
          type: number
          example: 0.3
        base:
          type: number
          example: 0
        finetune:
          type: number
          example: 0

    ToolsPart:
      type: object
      properties:
        type:
          type: string
          example: "tool_type"
        function:
          type: object
          properties:
            description:
              type: string
              example: "A description of the function."
            name:
              type: string
              example: "function_name"
            parameters:
              type: object
              additionalProperties: true
              description: "A map of parameter names to their values."
    ToolChoice:
      type: object
      required: [id, type, function, index]
      properties:
        # TODO: is this the right place for index?
        index:
          type: number
        id:
          type: string
        type:
          type: string
          enum: ["function"]
        function:
          type: object
          required: [name, arguments]
          properties:
            name:
              type: string
              example: "function_name"
            arguments:
              type: string

    FileResponse:
      type: object
      required:
        - id
        - object
        - created_at
        - filename
        - bytes
        - purpose
        - FileType
        - Processed
        - LineCount
      properties:
        id:
          type: string
        object:
          type: string
          example: "file"
        created_at:
          type: integer
          example: 1715021438
        filename:
          type: string
          example: "my_file.jsonl"
        bytes:
          type: integer
          example: 2664
        purpose:
          enum:
            - fine-tune
          example: "fine-tune"
        Processed:
          type: boolean
        FileType:
          enum:
            - jsonl
            - parquet
          example: "jsonl"
        LineCount:
          type: integer
    FileList:
      required:
        - data
      type: object
      properties:
        data:
          type: array
          items:
            $ref: "#/components/schemas/FileResponse"
    FileObject:
      type: object
      properties:
        object:
          type: string
        id:
          type: string
        filename:
          type: string
        size:
          type: integer
    FileDeleteResponse:
      type: object
      properties:
        id:
          type: string
        deleted:
          type: boolean
    FinetuneResponse:
      type: object
      required:
        - id
        - status
      properties:
        id:
          type: string
          format: uuid
        training_file:
          type: string
        validation_file:
          type: string
        model:
          type: string
        model_output_name:
          type: string
        model_output_path:
          type: string
        trainingfile_numlines:
          type: integer
        trainingfile_size:
          type: integer
        created_at:
          type: string
        updated_at:
          type: string
        n_epochs:
          type: integer
        n_checkpoints:
          type: integer
        n_evals:
          type: integer
        batch_size:
          type: integer
        learning_rate:
          type: number
        lr_scheduler:
          type: object
          $ref: "#/components/schemas/LRScheduler"
        warmup_ratio:
          type: number
        max_grad_norm:
          type: number
          format: float
        weight_decay:
          type: number
          format: float
        eval_steps:
          type: integer
        train_on_inputs:
          oneOf:
            - type: boolean
            - type: string
              enum:
                - auto
          default: auto
        training_type:
          type: object
          oneOf:
            - $ref: "#/components/schemas/FullTrainingType"
            - $ref: "#/components/schemas/LoRATrainingType"
        status:
          $ref: "#/components/schemas/FinetuneJobStatus"
        job_id:
          type: string
        events:
          type: array
          items:
            $ref: "#/components/schemas/FinetuneEvent"
        token_count:
          type: integer
        param_count:
          type: integer
        total_price:
          type: integer
        epochs_completed:
          type: integer
        queue_depth:
          type: integer
        wandb_project_name:
          type: string
        wandb_url:
          type: string

    FinetuneJobStatus:
      type: string
      enum:
        - pending
        - queued
        - running
        - compressing
        - uploading
        - cancel_requested
        - cancelled
        - error
        - completed

    FinetuneEvent:
      type: object
      properties:
        object:
          type: string
          enum:
            - "FinetuneEvent"
        created_at:
          type: string
        level:
          anyOf:
            - $ref: "#/components/schemas/FinetuneEventLevels"
        message:
          type: string
        type:
          $ref: "#/components/schemas/FinetuneEventType"
        param_count:
          type: integer
        token_count:
          type: integer
        wandb_url:
          type: string
        hash:
          type: string
    FinetuneEventLevels:
      type: string
      enum:
        - null
        - info
        - warning
        - error
        - legacy_info
        - legacy_iwarning
        - legacy_ierror
    FinetuneEventType:
      type: string
      enum:
        - job_pending
        - job_start
        - job_stopped
        - model_downloading
        - model_download_complete
        - training_data_downloading
        - training_data_download_complete
        - validation_data_downloading
        - validation_data_download_complete
        - wandb_init
        - training_start
        - checkpoint_save
        - billing_limit
        - epoch_complete
        - training_complete
        - model_compressing
        - model_compression_complete
        - model_uploading
        - model_upload_complete
        - job_complete
        - job_error
        - cancel_requested
        - job_restarted
        - refund
        - warning

    FinetuneList:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: "#/components/schemas/FinetuneResponse"
    FinetuneListEvents:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: "#/components/schemas/FineTuneEvent"
    FineTuneEvent:
      type: object
      required:
        - object
        - created_at
        - message
        - type
        - param_count
        - token_count
        - total_steps
        - wandb_url
        - step
        - checkpoint_path
        - model_path
        - training_offset
        - hash
      properties:
        object:
          type: string
          enum: [fine-tune-event]
        created_at:
          type: string
        level:
          type: string
          enum:
            - null
            - info
            - warning
            - error
            - legacy_info
            - legacy_iwarning
            - legacy_ierror
        message:
          type: string
        type:
          type: string
          enum:
            - job_pending
            - job_start
            - job_stopped
            - model_downloading
            - model_download_complete
            - training_data_downloading
            - training_data_download_complete
            - validation_data_downloading
            - validation_data_download_complete
            - wandb_init
            - training_start
            - checkpoint_save
            - billing_limit
            - epoch_complete
            - training_complete
            - model_compressing
            - model_compression_complete
            - model_uploading
            - model_upload_complete
            - job_complete
            - job_error
            - cancel_requested
            - job_restarted
            - refund
            - warning
        param_count:
          type: integer
        token_count:
          type: integer
        total_steps:
          type: integer
        wandb_url:
          type: string
        step:
          type: integer
        checkpoint_path:
          type: string
        model_path:
          type: string
        training_offset:
          type: integer
        hash:
          type: string

    FinetuneDownloadResult:
      type: object
      properties:
        object:
          enum:
            - null
            - local
        id:
          type: string
        checkpoint_step:
          type: integer
        filename:
          type: string
        size:
          type: integer

    FullTrainingType:
      type: object
      properties:
        type:
          type: string
          enum: ["Full"]
      required:
        - type
    LoRATrainingType:
      type: object
      properties:
        type:
          type: string
          enum: ["Lora"]
        lora_r:
          type: integer
        lora_alpha:
          type: integer
        lora_dropout:
          type: number
          format: float
          default: 0.0
        lora_trainable_modules:
          type: string
          default: "all-linear"
      required:
        - type
        - lora_r
        - lora_alpha
    LRScheduler:
      type: object
      properties:
        lr_scheduler_type:
          type: string
        lr_scheduler_args:
          type: object
          $ref: "#/components/schemas/LinearLRSchedulerArgs"
      required:
        - lr_scheduler_type
    LinearLRSchedulerArgs:
      type: object
      properties:
        min_lr_ratio:
          type: number
          format: float
          default: 0.0
          description: The ratio of the final learning rate to the peak learning rate