fix code style

ModelTC · Jan 7, 2025 · 283111b · 283111b
1 parent 60e3d79
commit 283111b
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 18 deletions.
diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py
@@ -149,11 +149,14 @@ def make_argument_parser() -> argparse.ArgumentParser:
     parser.add_argument("--beam_mode", action="store_true", help="use beamsearch mode")
     parser.add_argument("--diverse_mode", action="store_true", help="diversity generation mode")
     parser.add_argument("--token_healing_mode", action="store_true", help="code model infer mode")
-    # parser.add_argument("--simple_constraint_mode", action="store_true", help="output constraint mode")
-    parser.add_argument("--output_constraint_mode", type=str,
+
+    parser.add_argument(
+        "--output_constraint_mode",
+        type=str,
         choices=["outlines", "xgrammar", "none"],
         default="none",
-        help="set the output constraint backend, none means no output constraint",)
+        help="set the output constraint backend, none means no output constraint",
+    )
     parser.add_argument(
         "--first_token_constraint_mode",
         action="store_true",

diff --git a/lightllm/server/router/model_infer/infer_batch.py b/lightllm/server/router/model_infer/infer_batch.py
@@ -64,11 +64,11 @@ def __init__(
         # constraint states
         self.regular_constraint = regular_constraint
         self.guided_grammar = guided_grammar
-        self.fsm_current_state: int = 0
         self.allowed_token_ids = allowed_token_ids
 
         # Outlines constraint states
         self.regex_guide = None
+        self.fsm_current_state: int = 0
 
         # Xgrammar constraint states
         self.xgrammar_compiled_grammar = None
@@ -84,7 +84,9 @@ def __init__(
         return
 
     def has_constraint_setting(self) -> bool:
-        return self.regular_constraint is not None or self.allowed_token_ids is not None or self.guided_grammar is not None
+        return (
+            self.regular_constraint is not None or self.allowed_token_ids is not None or self.guided_grammar is not None
+        )
 
 
 class InferReq:

diff --git a/lightllm/server/router/model_infer/model_rpc.py b/lightllm/server/router/model_infer/model_rpc.py
@@ -49,7 +49,9 @@ def exposed_init_model(self, kvargs):
         if kvargs.get("args", None) is not None:
             is_simple_constraint_mode = kvargs.get("args", None).output_constraint_mode == "outlines"
             is_xgrammar_constraint_mode = kvargs.get("args", None).output_constraint_mode == "xgrammar"
-            assert not (is_simple_constraint_mode and is_xgrammar_constraint_mode), "only one constraint mode can be true"
+            assert not (
+                is_simple_constraint_mode and is_xgrammar_constraint_mode
+            ), "only one constraint mode can be true"
             is_prefill_node = kvargs.get("args", None).run_mode == "prefill"
             is_decode_node = kvargs.get("args", None).run_mode == "decode"
         else:
@@ -77,6 +79,7 @@ def exposed_init_model(self, kvargs):
         elif is_simple_constraint_mode:
             self.backend = SimpleConstraintBackend()
         elif is_xgrammar_constraint_mode:
+            # now we prioritize simple_constraint_mode(Outlines)
             self.backend = XgrammarBackend()
         elif is_first_token_constraint_mode:
             self.backend = FirstTokenConstraintBackend()

diff --git a/test/test_xgrammar_constraint.py b/test/test_xgrammar_constraint.py
@@ -3,16 +3,6 @@
 import json
 import threading
 
-"""
-python -m lightllm.server.api_server --model_dir /Meta-Llama-3-8B-Instruct  \
-                                     --host 0.0.0.0                 \
-                                     --port 8017                   \
-                                     --tp 1                         \
-                                     --max_total_token_num 100000 \
-                                     --simple_constraint_mode \
-                                     --use_dynamic_prompt_cache
-"""
-
 
 class RequestThread(threading.Thread):
     def __init__(self, url, headers, data):
@@ -51,7 +41,7 @@ def run(self):
         "inputs": "Introduce yourself in JSON briefly.",
         # 'temperature': 0.1,
         "parameters": {
-            "do_sample": False, 
+            "do_sample": False,
             "guided_grammar": json_grammar_ebnf_str,
             "max_new_tokens": 200,
         },
@@ -75,4 +65,4 @@ def run(self):
         },
     }
     thread = RequestThread(url, headers, data)
-    thread.start()
+    thread.start()