config.example.yaml

# ---- llm ----
model: openai:gpt-4o             # Specify the LLM to use
temperature: null                # Set default temperature parameter (0, 1)
top_p: null                      # Set default top-p parameter, with a range of (0, 1) or (0, 2) depending on the model

# ---- behavior ----
stream: true                     # Controls whether to use the stream-style API.
save: true                       # Indicates whether to persist the message
keybindings: emacs               # Choose keybinding style (emacs, vi)
editor: null                     # Specifies the command used to edit input buffer or session. (e.g. vim, emacs, nano).
wrap: no                         # Controls text wrapping (no, auto, <max-width>)
wrap_code: false                 # Enables or disables wrapping of code blocks

# ---- function-calling ----
# Visit https://github.com/sigoden/llm-functions for setup instructions
function_calling: true           # Enables or disables function calling (Globally).
mapping_tools:                   # Alias for a tool or toolset
  fs: 'fs_cat,fs_ls,fs_mkdir,fs_rm,fs_write'
use_tools: null                  # Which tools to use by default. (e.g. 'fs,web_search')

# ---- prelude ----
prelude: null                    # Set a default role or session to start with (e.g. role:<name>, session:<name>, <session>:<role>)
repl_prelude: null               # Overrides the `prelude` setting specifically for conversations started in REPL
agent_prelude: null              # Set a session to use when starting a agent. (e.g. temp, default)

# ---- session ----
# Controls the persistence of the session. if true, auto save; if false, not save; if null, asking the user
save_session: null
# Compress session when token count reaches or exceeds this threshold
compress_threshold: 4000
# Text prompt used for creating a concise summary of session message
summarize_prompt: 'Summarize the discussion briefly in 200 words or less to use as a prompt for future context.'
# Text prompt used for including the summary of the entire session
summary_prompt: 'This is a summary of the chat history as a recap: '

# ---- RAG ----
# See [RAG-Guide](https://github.com/sigoden/aichat/wiki/RAG-Guide) for more details.
rag_embedding_model: null                   # Specifies the embedding model to use
rag_reranker_model: null                    # Specifies the rerank model to use
rag_top_k: 5                                # Specifies the number of documents to retrieve
rag_chunk_size: null                        # Specifies the chunk size
rag_chunk_overlap: null                     # Specifies the chunk overlap
rag_min_score_vector_search: 0              # Specifies the minimum relevance score for vector-based searching
rag_min_score_keyword_search: 0             # Specifies the minimum relevance score for keyword-based searching
# Defines the query structure using variables like __CONTEXT__ and __INPUT__ to tailor searches to specific needs
rag_template: |
  Answer the query based on the context while respecting the rules. (user query, some textual context and rules, all inside xml tags)

  <context>
  __CONTEXT__
  </context>

  <rules>
  - If you don't know, just say so.
  - If you are not sure, ask for clarification.
  - Answer in the same language as the user query.
  - If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
  - If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
  - Answer directly and without using xml tags.
  </rules>

  <user_query>
  __INPUT__
  </user_query>

# Define document loaders to control how RAG and `.file`/`--file` load files of specific formats.
document_loaders:
  # You can add custom loaders using the following syntax:
  #   <file-extension>: <command-to-load-the-file>
  # Note: Use `$1` for input file and `$2` for output file. If `$2` is omitted, use stdout as output.
  pdf: 'pdftotext $1 -'                         # Load .pdf file, see https://poppler.freedesktop.org to set up pdftotext
  docx: 'pandoc --to plain $1'                  # Load .docx file, see https://pandoc.org to set up pandoc

# ---- apperence ----
highlight: true                  # Controls syntax highlighting
light_theme: false               # Activates a light color theme when true. env: AICHAT_LIGHT_THEME
# Custom REPL left/right prompts, see https://github.com/sigoden/aichat/wiki/Custom-REPL-Prompt for more details
left_prompt:
  '{color.green}{?session {?agent {agent}>}{session}{?role /}}{!session {?agent {agent}>}}{role}{?rag @{rag}}{color.cyan}{?session )}{!session >}{color.reset} '
right_prompt:
  '{color.purple}{?session {?consume_tokens {consume_tokens}({consume_percent}%)}{!consume_tokens {consume_tokens}}}{color.reset}'

# ---- misc ----
serve_addr: 127.0.0.1:8000                  # Default serve listening address 
user_agent: null                            # Set User-Agent HTTP header, use `auto` for aichat/<current-version>
save_shell_history: true                    # Whether to save shell execution command to the history file

# ---- clients ----
clients:
  # All clients have the following configuration:
  # - type: xxxx
  #   name: xxxx                                      # Only use it to distinguish clients with the same client type. Optional
  #   models:
  #     - name: xxxx                                  # Chat model
  #       max_input_tokens: 100000
  #       supports_vision: true
  #       supports_function_calling: true
  #     - name: xxxx                                  # Embedding model
  #       type: embedding
  #       max_input_tokens: 200000
  #       max_tokens_per_chunk: 2000
  #       default_chunk_size: 1500                        
  #       max_batch_size: 100
  #     - name: xxxx                                  # Reranker model
  #       type: reranker 
  #       max_input_tokens: 2048
  #   patch:                                          # Patch api
  #     chat_completions:                             # Api type, possible values: chat_completions, embeddings, and rerank
  #       <regex>:                                    # The regex to match model names, e.g. '.*' 'gpt-4o' 'gpt-4o|gpt-4-.*'
  #         url: ''                                   # Patch request url
  #         body:                                     # Patch request body
  #           <json>
  #         headers:                                  # Patch request headers
  #           <key>: <value>
  #   extra:
  #     proxy: socks5://127.0.0.1:1080                # Set proxy
  #     connect_timeout: 10                           # Set timeout in seconds for connect to api

  # See https://platform.openai.com/docs/quickstart
  - type: openai
    api_base: https://api.openai.com/v1               # Optional
    api_key: xxx
    organization_id: org-xxx                          # Optional

  # For any platform compatible with OpenAI's API
  - type: openai-compatible
    name: local
    api_base: http://localhost:8080/v1
    api_key: xxx                                      # Optional
    models:
      - name: llama3.1
        max_input_tokens: 128000
        supports_function_calling: true
      - name: jina-embeddings-v2-base-en
        type: embedding
        default_chunk_size: 1500
        max_batch_size: 100
      - name: jina-reranker-v2-base-multilingual
        type: reranker

  # See https://ai.google.dev/docs
  - type: gemini
    api_base: https://generativelanguage.googleapis.com/v1beta
    api_key: xxx
    patch:
      chat_completions:
        '.*':
          body:
            safetySettings:
              - category: HARM_CATEGORY_HARASSMENT
                threshold: BLOCK_NONE
              - category: HARM_CATEGORY_HATE_SPEECH
                threshold: BLOCK_NONE
              - category: HARM_CATEGORY_SEXUALLY_EXPLICIT
                threshold: BLOCK_NONE
              - category: HARM_CATEGORY_DANGEROUS_CONTENT
                threshold: BLOCK_NONE

  # See https://docs.anthropic.com/claude/reference/getting-started-with-the-api
  - type: claude
    api_base: https://api.anthropic.com/v1            # Optional
    api_key: xxx

  # See https://docs.mistral.ai/
  - type: openai-compatible
    name: mistral
    api_base: https://api.mistral.ai/v1
    api_key: xxx

  # See https://docs.x.ai/docs
  - type: openai-compatible
    name: xai
    api_base: https://api.x.ai/v1
    api_key: xxx

  # See https://docs.ai21.com/docs/quickstart
  - type: openai-compatible
    name: ai12
    api_base: https://api.ai21.com/studio/v1
    api_key: xxx

  # See https://docs.cohere.com/docs/the-cohere-platform
  - type: cohere
    api_base: https://api.cohere.ai/v2                # Optional
    api_key: xxx

  # See https://docs.perplexity.ai/docs/getting-started
  - type: openai-compatible
    name: perplexity
    api_base: https://api.perplexity.ai
    api_key: xxx

  # See https://console.groq.com/docs/quickstart
  - type: openai-compatible
    name: groq
    api_base: https://api.groq.com/openai/v1
    api_key: xxx

  # See https://github.com/jmorganca/ollama
  - type: openai-compatible
    name: ollama
    api_base: http://localhost:11434/v1

  # See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart
  - type: azure-openai
    api_base: https://{RESOURCE}.openai.azure.com
    api_key: xxx
    models:
      - name: gpt-4o                                  # Model deployment name
        max_input_tokens: 128000
        supports_vision: true
        supports_function_calling: true

  # See https://cloud.google.com/vertex-ai
  - type: vertexai
    project_id: xxx
    location: xxx
    # Specifies a application-default-credentials (adc) file
    # Run `gcloud auth application-default login` to init the adc file
    # see https://cloud.google.com/docs/authentication/external/set-up-adc
    adc_file: <gcloud-config-dir>/application_default_credentials.json>  # Optional field
    patch:
      chat_completions:
        'gemini-.*':
          body:
            safetySettings:
              - category: HARM_CATEGORY_HARASSMENT
                threshold: BLOCK_ONLY_HIGH
              - category: HARM_CATEGORY_HATE_SPEECH
                threshold: BLOCK_ONLY_HIGH
              - category: HARM_CATEGORY_SEXUALLY_EXPLICIT
                threshold: BLOCK_ONLY_HIGH
              - category: HARM_CATEGORY_DANGEROUS_CONTENT
                threshold: BLOCK_ONLY_HIGH

  # See https://docs.aws.amazon.com/bedrock/latest/userguide/
  - type: bedrock
    access_key_id: xxx
    secret_access_key: xxx
    region: xxx

  # See https://developers.cloudflare.com/workers-ai/
  - type: openai-compatible
    name: cloudflare
    api_base: https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1
    api_key: xxx

  # See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
  - type: ernie
    api_key: xxx
    secret_key: xxx

  # See https://dashscope.aliyun.com/
  - type: openai-compatible
    name: qianwen
    api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
    api_key: xxx

  # See https://cloud.tencent.com/product/hunyuan
  - type: openai-compatible
    name: hunyuan
    api_base: https://api.hunyuan.cloud.tencent.com/v1
    api_key: xxx

  # See https://platform.moonshot.cn/docs/intro
  - type: openai-compatible
    name: moonshot
    api_base: https://api.moonshot.cn/v1
    api_key: xxx

  # See https://platform.deepseek.com/api-docs/
  - type: openai-compatible
    name: deepseek
    api_base: https://api.deepseek.com
    api_key: xxx

  # See https://open.bigmodel.cn/dev/howuse/introduction
  - type: openai-compatible
    name: zhipuai
    api_base: https://open.bigmodel.cn/api/paas/v4
    api_key: xxx

  # See https://platform.lingyiwanwu.com/docs
  - type: openai-compatible
    name: lingyiwanwu
    api_base: https://api.lingyiwanwu.com/v1
    api_key: xxx

  # See https://deepinfra.com/docs
  - type: openai-compatible
    name: deepinfra
    api_base: https://api.deepinfra.com/v1/openai
    api_key: xxx

  # See https://github.com/marketplace/models
  - type: openai-compatible
    name: github
    api_base: https://models.inference.ai.azure.com
    api_key: xxx

  # See https://readme.fireworks.ai/docs/quickstart
  - type: openai-compatible
    name: fireworks
    api_base: https://api.fireworks.ai/inference/v1
    api_key: xxx

  # See https://openrouter.ai/docs#quick-start
  - type: openai-compatible
    name: openrouter
    api_base: https://openrouter.ai/api/v1
    api_key: xxx

  # See https://docs.siliconflow.cn/docs/getting-started
  - type: openai-compatible
    name: siliconflow
    api_base: https://api.siliconflow.cn/v1
    api_key: xxx

  # See https://docs.together.ai/docs/quickstart
  - type: openai-compatible
    name: together
    api_base: https://api.together.xyz/v1
    api_key: xxx

  # ----- RAG dedicated -----

  # See https://jina.ai
  - type: openai-compatible
    name: jina
    api_base: https://api.jina.ai/v1
    api_key: xxx

  # See https://docs.voyageai.com/docs/introduction
  - type: openai-compatible
    name: voyageai
    api_base: https://api.voyageai.com/v1
    api_key: xxx