Fix the server CLI issue with use_flashinfer flag (#42)

* fix refactor * empty * fix lint
mlsys-io · Jul 8, 2024 · 4e384cc · 4e384cc
1 parent cb9535a
commit 4e384cc
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 2 deletions.
diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py
@@ -43,7 +43,7 @@ def serve(
     logger_level: str = "INFO",
     json_output: bool = False,
     otlp_endpoint: Optional[str] = None,
-    use_flashinfer: bool = True,
+    use_flashinfer: Optional[bool] = True,
 ):
     if sharded:
         assert (

diff --git a/server/text_generation_server/models_flashinfer/__init__.py b/server/text_generation_server/models_flashinfer/__init__.py
@@ -79,6 +79,11 @@ class ModelType(enum.Enum):
     }
 
 
+__GLOBALS = locals()
+for data in ModelType:
+    __GLOBALS[data.name] = data.value["type"]
+
+
 def get_model(
     model_id: str,
     revision: Optional[str],

diff --git a/server/text_generation_server/server_flashinfer.py b/server/text_generation_server/server_flashinfer.py
@@ -175,7 +175,6 @@ async def serve_inner(
                 revision,
                 sharded,
                 quantize,
-                speculate,
                 dtype,
                 trust_remote_code,
                 lora_ids,