Skip to content

Commit

Permalink
fix code style
Browse files Browse the repository at this point in the history
  • Loading branch information
flyinglandlord committed Jan 7, 2025
1 parent 60e3d79 commit 283111b
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 18 deletions.
9 changes: 6 additions & 3 deletions lightllm/server/api_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,14 @@ def make_argument_parser() -> argparse.ArgumentParser:
parser.add_argument("--beam_mode", action="store_true", help="use beamsearch mode")
parser.add_argument("--diverse_mode", action="store_true", help="diversity generation mode")
parser.add_argument("--token_healing_mode", action="store_true", help="code model infer mode")
# parser.add_argument("--simple_constraint_mode", action="store_true", help="output constraint mode")
parser.add_argument("--output_constraint_mode", type=str,

parser.add_argument(
"--output_constraint_mode",
type=str,
choices=["outlines", "xgrammar", "none"],
default="none",
help="set the output constraint backend, none means no output constraint",)
help="set the output constraint backend, none means no output constraint",
)
parser.add_argument(
"--first_token_constraint_mode",
action="store_true",
Expand Down
6 changes: 4 additions & 2 deletions lightllm/server/router/model_infer/infer_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ def __init__(
# constraint states
self.regular_constraint = regular_constraint
self.guided_grammar = guided_grammar
self.fsm_current_state: int = 0
self.allowed_token_ids = allowed_token_ids

# Outlines constraint states
self.regex_guide = None
self.fsm_current_state: int = 0

# Xgrammar constraint states
self.xgrammar_compiled_grammar = None
Expand All @@ -84,7 +84,9 @@ def __init__(
return

def has_constraint_setting(self) -> bool:
return self.regular_constraint is not None or self.allowed_token_ids is not None or self.guided_grammar is not None
return (
self.regular_constraint is not None or self.allowed_token_ids is not None or self.guided_grammar is not None
)


class InferReq:
Expand Down
5 changes: 4 additions & 1 deletion lightllm/server/router/model_infer/model_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def exposed_init_model(self, kvargs):
if kvargs.get("args", None) is not None:
is_simple_constraint_mode = kvargs.get("args", None).output_constraint_mode == "outlines"
is_xgrammar_constraint_mode = kvargs.get("args", None).output_constraint_mode == "xgrammar"
assert not (is_simple_constraint_mode and is_xgrammar_constraint_mode), "only one constraint mode can be true"
assert not (
is_simple_constraint_mode and is_xgrammar_constraint_mode
), "only one constraint mode can be true"
is_prefill_node = kvargs.get("args", None).run_mode == "prefill"
is_decode_node = kvargs.get("args", None).run_mode == "decode"
else:
Expand Down Expand Up @@ -77,6 +79,7 @@ def exposed_init_model(self, kvargs):
elif is_simple_constraint_mode:
self.backend = SimpleConstraintBackend()
elif is_xgrammar_constraint_mode:
# now we prioritize simple_constraint_mode(Outlines)
self.backend = XgrammarBackend()
elif is_first_token_constraint_mode:
self.backend = FirstTokenConstraintBackend()
Expand Down
14 changes: 2 additions & 12 deletions test/test_xgrammar_constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@
import json
import threading

"""
python -m lightllm.server.api_server --model_dir /Meta-Llama-3-8B-Instruct \
--host 0.0.0.0 \
--port 8017 \
--tp 1 \
--max_total_token_num 100000 \
--simple_constraint_mode \
--use_dynamic_prompt_cache
"""


class RequestThread(threading.Thread):
def __init__(self, url, headers, data):
Expand Down Expand Up @@ -51,7 +41,7 @@ def run(self):
"inputs": "Introduce yourself in JSON briefly.",
# 'temperature': 0.1,
"parameters": {
"do_sample": False,
"do_sample": False,
"guided_grammar": json_grammar_ebnf_str,
"max_new_tokens": 200,
},
Expand All @@ -75,4 +65,4 @@ def run(self):
},
}
thread = RequestThread(url, headers, data)
thread.start()
thread.start()

0 comments on commit 283111b

Please sign in to comment.