rename the configs and create folder

Signed-off-by: root <root@peter-ubuntu-2204-x.cluster.local>
openshift-psap · Jan 3, 2025 · f7b10ad · f7b10ad
1 parent 9aefca9
commit f7b10ad
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 11 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,4 @@
 # Other
-config.yaml
 output/
 
 # Byte-compiled / optimized / DLL files

diff --git a/README.md b/README.md
@@ -35,8 +35,8 @@ optional arguments:
 ```
 
 There're some example CONFIG files available:
- - config-tgis.yaml:   config file for GRPC request for TGI Server.
- - config-openai.yaml: config file for OpenAI format API endpoints.
+ - example-configs/config-tgis.yaml:   config file for GRPC request for TGI Server.
+ - config.yaml: config file for those OpenAI format API endpoints.
 
 more LLM protocol config format, refer to ./plugins for more details. 
 

diff --git a/config-openai.yaml → config.yaml b/config-openai.yaml → config.yaml
@@ -13,17 +13,19 @@ dataset:
   max_queries: 1000
   min_input_tokens: 0
   max_input_tokens: 1024
-  max_output_tokens: 256
+  max_output_tokens: 2560
   max_sequence_tokens: 1024
 load_options:
   type: constant #Future options: loadgen, stair-step
-  concurrency: 2
+  concurrency: 2 # can also be a list [1,2,4]
   duration: 20 # In seconds. Maybe in future support "100s" "10m", etc...
 plugin: "openai_plugin"
 plugin_options:
-  streaming: False
-  host: "http://localhost:8000"
-  model_name: "facebook/opt-125m"
-  endpoint: "/v1/completions" # "/v1/chat/completions"
+  api_key: YOUR_API_KEY
+  use_tls: False # Use True if querying an SSL grpc endpoint over https
+  streaming: True 
+  model_name: "gpt-4o-mini"
+  host: "http://route.to.host"
+  endpoint: "/v1/chat/completions" # "/v1/chat/completions"
 extra_metadata:
   replicas: 1
diff --git a/config-tgis.yaml → example-configs/config-tgis.yaml b/config-tgis.yaml → example-configs/config-tgis.yaml
diff --git a/plugins/openai_plugin.py b/plugins/openai_plugin.py
@@ -17,6 +17,7 @@
 plugin_options:
   streaming: True/False
   host: "http://127.0.0.1:5000/v1/completions"
+  api_key: sk-xxxxxx
   model_name: "/mnt/model/"
   endpoint: "/v1/completions" # "/v1/chat/completions"
 """
@@ -71,6 +72,8 @@ def _parse_args(self, args):
 
         logger.debug("Model name: %s", self.model_name)
 
+        self.api_key = args.get("api_key")
+
         self.api = args.get('api')
 
         if not self.api:
@@ -104,7 +107,8 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):
 
         result.start_time = time.time()
 
-        headers = {"Content-Type": "application/json"}
+        headers = {"Content-Type": "application/json",
+                   "Authorization": "Bearer " + self.api_key}
 
         request = {
             "max_tokens": query["output_tokens"],
@@ -121,6 +125,7 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):
         if self.model_name is not None:
             request["model"] = self.model_name
 
+
         # Merge request and defaults
         data = self.request_defaults | request
 
@@ -179,7 +184,9 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):
 
 
     def streaming_request_http(self, query: dict, user_id: int, test_end_time: float):
-        headers = {"Content-Type": "application/json"}
+
+        headers = {"Content-Type": "application/json",
+                   "Authorization": "Bearer " + self.api_key}
 
         request = {
             "max_tokens": query["output_tokens"],