From c249b98aaae952e76577ff7b396a0808053339ed Mon Sep 17 00:00:00 2001 From: minghaipeng Date: Wed, 8 Jan 2025 06:51:43 +0000 Subject: [PATCH] fix bug --- llm/server/server/http_server/api.py | 1 + llm/server/server/triton_server.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llm/server/server/http_server/api.py b/llm/server/server/http_server/api.py index df9c066284..2e01ae039d 100644 --- a/llm/server/server/http_server/api.py +++ b/llm/server/server/http_server/api.py @@ -31,6 +31,7 @@ class Req(BaseModel): req_id: str = Field(default_factory=lambda: str(uuid.uuid4())) input_ids: Optional[List[int]] = None text: Optional[str] = None + stop_sequences: Optional[List] = None messages: Optional[List] = None max_dec_len: Optional[int] = None seq_len: Optional[int] = None diff --git a/llm/server/server/triton_server.py b/llm/server/server/triton_server.py index ddd74c4b87..02be0b4e8a 100644 --- a/llm/server/server/triton_server.py +++ b/llm/server/server/triton_server.py @@ -102,9 +102,7 @@ def _cache_special_tokens(self, batch_result): for i in range(len(batch_result)): is_end = batch_result[i].get("is_end", 0) token_ids = batch_result[i]["token_ids"] - return_all_tokens = batch_result[i].get("return_all_tokens", False) - cache_special_token = False if is_end == 1 else True - if is_end != 1 and (cache_special_token or return_all_tokens or self.cfg.disable_streaming): + if is_end != 1: if batch_result[i]["req_id"] not in self.token_buffer: self.token_buffer[batch_result[i]["req_id"]] = list() self.score_buffer[batch_result[i]["req_id"]] = list()