Skip to content

Commit

Permalink
rename the configs and create folder
Browse files Browse the repository at this point in the history
Signed-off-by: root <root@peter-ubuntu-2204-x.cluster.local>
  • Loading branch information
panpan0000 authored and root committed Jan 3, 2025
1 parent 9aefca9 commit f7b10ad
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 11 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Other
config.yaml
output/

# Byte-compiled / optimized / DLL files
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ optional arguments:
```

There're some example CONFIG files available:
- config-tgis.yaml: config file for GRPC request for TGI Server.
- config-openai.yaml: config file for OpenAI format API endpoints.
- example-configs/config-tgis.yaml: config file for GRPC request for TGI Server.
- config.yaml: config file for those OpenAI format API endpoints.

more LLM protocol config format, refer to ./plugins for more details.

Expand Down
14 changes: 8 additions & 6 deletions config-openai.yaml → config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@ dataset:
max_queries: 1000
min_input_tokens: 0
max_input_tokens: 1024
max_output_tokens: 256
max_output_tokens: 2560
max_sequence_tokens: 1024
load_options:
type: constant #Future options: loadgen, stair-step
concurrency: 2
concurrency: 2 # can also be a list [1,2,4]
duration: 20 # In seconds. Maybe in future support "100s" "10m", etc...
plugin: "openai_plugin"
plugin_options:
streaming: False
host: "http://localhost:8000"
model_name: "facebook/opt-125m"
endpoint: "/v1/completions" # "/v1/chat/completions"
api_key: YOUR_API_KEY
use_tls: False # Use True if querying an SSL grpc endpoint over https
streaming: True
model_name: "gpt-4o-mini"
host: "http://route.to.host"
endpoint: "/v1/chat/completions" # "/v1/chat/completions"
extra_metadata:
replicas: 1
File renamed without changes.
11 changes: 9 additions & 2 deletions plugins/openai_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
plugin_options:
streaming: True/False
host: "http://127.0.0.1:5000/v1/completions"
api_key: sk-xxxxxx
model_name: "/mnt/model/"
endpoint: "/v1/completions" # "/v1/chat/completions"
"""
Expand Down Expand Up @@ -71,6 +72,8 @@ def _parse_args(self, args):

logger.debug("Model name: %s", self.model_name)

self.api_key = args.get("api_key")

self.api = args.get('api')

if not self.api:
Expand Down Expand Up @@ -104,7 +107,8 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):

result.start_time = time.time()

headers = {"Content-Type": "application/json"}
headers = {"Content-Type": "application/json",
"Authorization": "Bearer " + self.api_key}

request = {
"max_tokens": query["output_tokens"],
Expand All @@ -121,6 +125,7 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):
if self.model_name is not None:
request["model"] = self.model_name


# Merge request and defaults
data = self.request_defaults | request

Expand Down Expand Up @@ -179,7 +184,9 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):


def streaming_request_http(self, query: dict, user_id: int, test_end_time: float):
headers = {"Content-Type": "application/json"}

headers = {"Content-Type": "application/json",
"Authorization": "Bearer " + self.api_key}

request = {
"max_tokens": query["output_tokens"],
Expand Down

0 comments on commit f7b10ad

Please sign in to comment.