example.config.yml

logging: False
#日志"
port: 17860
#webui 默认启动端口号"
library:
   strategy: "calc:2 rtst:2 agents:0"
   #库参数，每组参数间用空格分隔，冒号前为知识库类型，后为抽取数量。

   #知识库类型:
   #bing        cn.bing搜索，仅国内可用，目前处于服务降级状态
   #sogowx      sogo微信公众号搜索，可配合相应auto实现全文内容分析
   #fess        fess搜索引擎
   #rtst        支持实时生成的sentence_transformers
   #remote      调用远程闻达知识库，用于集群化部署
   #kg          知识图谱,暂未启用
   #特殊库：
   #mix         根据参数进行多知识库融合
   #agents      提供网络资源代理，没有知识库查找功能，所以数量为0
   #            （目前stable-diffusion的auto脚本需要使用其中功能，同时需开启stable-diffusion的api功能）

   count: 5
   #最大抽取数量（所有知识库总和）

   step: 2
   #知识库默认上下文步长
librarys:
   bing:
      count:
         5
         #最大抽取数量
   bingsite:
      count: 5
      #最大抽取数量
      site: "www.12371.cn"
      #搜索网站
   fess:
      #fess版本，默认采用14.8以上
      version: 14.8
      count: 1
      #最大抽取数量
      fess_host: "127.0.0.1:8080"
      #fess搜索引擎的部署地址
   remote:
      host:
         "http://127.0.0.1:17860/api/find"
         #远程知识库地址地址
   rtst:
      count: 3
      #最大抽取数量
      #   backend: Annoy
      size: 20
      #分块大小"
      overlap: 0
      #分块重叠长度
      model_path: "model/m3e-base"
      #向量模型存储路径
      device: cuda
      #embedding运行设备
   qdrant:
      count: 3
      #最大抽取数量
      size: 20
      #分块大小
      overlap: 0
      #分块重叠长度
      path: txt
      #知识库文本路径
      model_path: "model/m3e-base"
      #向量模型存储路径
      #qdrant_host: "http://localhost:6333"
      #qdrant服务地址
      qdrant_path: "memory/q"
      #qdrant本地目录   qdrant_path   qdrant_host  同时只能存在一个
      device: cpu
      #qdrant运行设备
      batch_size: 32
      #向量模型批处理大小
      collection: qa_collection
      #qdrant集合名称
      similarity_threshold: 0.8
      #相似度阈值
   kg:
      count: 5
      #最大抽取数量
      knowledge_path: ""
      #知识库的文件夹目录名称，若留空则为txt
      graph_host: ""
      #图数据库部署地址
      model_path: ""
      #信息抽取模型所在路径"
llm_type: rwkv
#llm模型类型:glm6b、rwkv、llama、replitcode等，详见相关文件
llm_models:
   rwkv:
      path: "model/RWKV-4-World-7B-v1-OnlyForTest_52%_trained-20230606-ctx4096-i8.pth" #rwkv模型位置"
      strategy: "cuda fp16i8"
      #   path: "model/rwkv_ggml_q8.bin"           #rwkv模型位置"
      #   strategy: "Q8_0"       #rwkvcpp:运行方式，设置strategy诸如"Q8_0->16"即可开启，代表运行Q8_0模型在16个cpu核心上
      #记得去https://github.com/saharNooby/rwkv.cpp/releases下最新版本文件替换librwkv.so或rwkv.dll
      #rwkv模型参数"

      historymode: state
      #rwkv历史记录实现方式：state、string。注意，对于torch实现，本参数已弃用，因为已经实现自动切换。
      state_source_device: cpu
      #torch实现下，会保存每个会话的state。每个占用2M显存，本参数控制将其复制到内存以节约显存。
      #置为cuda:0，代表state来自cuda:0，且需要将其复制到内存以节约显存。
      #置为cuda:1，同理。
      #置为cpu，代表不需要复制，如果使用cpu计算，或多卡计算需这么设置。
      presence_penalty: 0.2
      count_penalty: 0.2
   glm6b:
      path: "model\\chatglm2-6b-int4"
      #glm模型位置"
      strategy: "cuda fp16"
      #cuda fp16	 所有glm模型 要直接跑在gpu上都可以使用这个参数
      #cuda fp16i8	 fp16原生模型 要自行量化为int8跑在gpu上可以使用这个参数
      #cuda fp16i4	 fp16原生模型 要自行量化为int4跑在gpu上可以使用这个参数
      #cuda:0 fp16 *14 -> cuda:1	fp16 多卡流水线并行，使用方法参考RWKV的strategy介绍。总层数28
      #   lora: "model/lora-450"
      #glm-lora模型位置
   internlm:
      path: model\internlm-chat-7b-8k
      #模型位置"
   baichuan:
      # path: "model\\baichuan-7B"
      path: model\baichuan-vicuna-7B-GPTQ #https://huggingface.co/TheBloke/baichuan-vicuna-7B-GPTQ#chinese-model-card
      #模型名字中有gptq会进入gptq模式，不加载lora，且会加载basename中的模型
      basename: baichuan-vicuna-7b-GPTQ-4bit-128g.no-act.order
      #模型位置"
      lora: "model/64rank"
   aquila: #https://model.baai.ac.cn/model-detail/100101
      path: "model/AquilaChat-7B" 
      #模型位置"
      strategy: "cuda fp16"
   llama:
      path: "model/stable-vicuna-13B.ggml.q4_2.bin"
      #llama模型位置
      #   strategy: "Q8_0"       #cpp:运行方式，设置strategy诸如"Q8_0"即可开启
      strategy: "cuda fp16i8"
   moss:
      path: "model/moss-moon-003-sft-plugin-int4"
      #模型位置
      strategy: ""
      #模型参数 暂时不用"
   openai:
      #   api_host: "https://gpt.lucent.blog/v1" #网友的反代
      #   api_host: "https://api.openai.com/v1" #官方地址
      api_host: "http://127.0.0.1:8000/v1" #rwkv runner

   replitcode:
      path: "model\\replit-code-v1-3b"
      #replitcode模型位置
      #说明：目前模型参数和chat模型差异较大，写死了，不能通过wenda界面配置，需要调整自行到llm_replitcode.py 文件中调整，或放开wenda界面参数
      #y = model.generate(x, max_length=100, do_sample=true, top_p=0.95, top_k=4, temperature=0.2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
      #模型地址：https://huggingface.co/replit/replit-code-v1-3b ，约10g
      #作用代码补全：问：def fibonacci(n):
      #答：def fibonacci(n):
      #if n == 0:
      #return 0
      #elif n == 1:
      #return 1
      #else:
      #return fibonacci(n-1) + fibonacci(n-2)
      #print(fibonacci(10))
      strategy: "cuda fp16"