-
Notifications
You must be signed in to change notification settings - Fork 291
/
openai-forward-config.yaml
92 lines (75 loc) · 1.7 KB
/
openai-forward-config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
log:
general: true
openai: true
cache:
general: true
openai: true
routes:
- "/v1/chat/completions"
- "/v1/embeddings"
# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
backend: MEMORY
root_path_or_url: "./FLAXKV_DB"
default_request_caching_value: false
chat_completion_route: "/v1/chat/completions"
# custom_general_route: "/v1/models/gemini-pro"
benchmark_mode: true
forward:
- base_url: "https://api.openai.com"
route: "/"
type: "openai"
- base_url: "https://generativelanguage.googleapis.com"
route: "/gemini"
type: "general"
# custom_model_config:
# backend: "ollama"
# model_map:
# gpt-3.5-turbo: "qwen2:7b"
# api_base: "http://localhost:11434"
api_key:
level:
1: ["gpt-4"]
2: ["gpt-3.5-turbo"]
openai_key:
"sk-xxx1": [0]
"sk-xxx2": [1]
"sk-xxx3": [1, 2]
forward_key:
0: ["fk-0"]
1: ["fk-1", "fk-11"]
2: ["fk-2"]
rate_limit:
global_rate_limit: "200/minute"
strategy: "moving-window"
iter_chunk: "one-by-one"
req_rate_limit:
- route: "/v1/chat/completions"
value:
- level: 0
limit: "100/2minutes"
- route: "/v1/completions"
value:
- level: 0
limit: "60/minute;600/hour"
req_rate_limit_backend: "redis://localhost:6379"
token_rate_limit:
- route: "/v1/chat/completions"
value:
- level: 0
limit: "60/second"
- route: "/v1/completions"
value:
- level: 0
limit: "60/second"
- route: "/benchmark/v1/chat/completions"
value:
- level: 0
limit: "20/second"
timeout: 6
ip_blacklist:
ip_whitelist:
webui_restart_port: 15555
webui_log_port: 15556
proxy:
default_stream_response: true
tz: Asia/Shanghai