From d1521b05b90793ecbf5dabeda04d2a2198e3e385 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 12 Jan 2025 02:23:16 +0000 Subject: [PATCH 1/4] add Signed-off-by: Roger Wang --- vllm/v1/engine/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 43419d2ff5381..6ee7cd9cc49d9 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -181,7 +181,7 @@ def process_inputs( return EngineCoreRequest( request_id=request_id, - prompt=decoder_inputs.prompt, + prompt=None, # core engine does not need original text prompt prompt_token_ids=decoder_inputs.prompt_token_ids, mm_inputs=sorted_mm_inputs, mm_hashes=sorted_mm_hashes, From 6f2f0ef68e53f1369edca5e1d093898e44f7951d Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 12 Jan 2025 04:30:52 +0000 Subject: [PATCH 2/4] update Signed-off-by: Roger Wang --- vllm/v1/engine/core_client.py | 6 ++++++ vllm/v1/engine/processor.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index a4a45ae05ff9e..4ed7f944b058f 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -219,6 +219,9 @@ def _send_input(self, request_type: EngineCoreRequestType, self.input_socket.send_multipart(msg, copy=False) def add_request(self, request: EngineCoreRequest) -> None: + # NOTE: text prompt is not needed in the core engine as it has been + # tokenized. + request.prompt = None self._send_input(EngineCoreRequestType.ADD, request) def abort_requests(self, request_ids: List[str]) -> None: @@ -257,6 +260,9 @@ async def _send_input(self, request_type: EngineCoreRequestType, await self.input_socket.send_multipart(msg, copy=False) async def add_request_async(self, request: EngineCoreRequest) -> None: + # NOTE: text prompt is not needed in the core engine as it has been + # tokenized. + request.prompt = None await self._send_input(EngineCoreRequestType.ADD, request) async def abort_requests_async(self, request_ids: List[str]) -> None: diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 6ee7cd9cc49d9..43419d2ff5381 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -181,7 +181,7 @@ def process_inputs( return EngineCoreRequest( request_id=request_id, - prompt=None, # core engine does not need original text prompt + prompt=decoder_inputs.prompt, prompt_token_ids=decoder_inputs.prompt_token_ids, mm_inputs=sorted_mm_inputs, mm_hashes=sorted_mm_hashes, From f58080586f8dfb912cf98f1cc574a5013eb19a79 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 12 Jan 2025 04:37:01 +0000 Subject: [PATCH 3/4] update comment Signed-off-by: Roger Wang --- vllm/v1/engine/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 5e3c5e327ef63..2becd14cd63cc 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -19,8 +19,8 @@ class EngineCoreRequest: # due to circular imports and typing we have in data.py request_id: str - #NOTE(Nick): I don't think we need to pass prompt here since it should - # always be tokenized? + # NOTE(ywang96): original text prompt is needed when a request is added to + # Detokenizer, but set to None when it is added to EngineCoreClient. prompt: Optional[str] prompt_token_ids: List[int] mm_inputs: Optional[List[Optional["MultiModalKwargs"]]] From 7f86837b7288094f696530c18a43039576a84f97 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 12 Jan 2025 04:52:04 +0000 Subject: [PATCH 4/4] yapf Signed-off-by: Roger Wang --- vllm/v1/engine/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 2becd14cd63cc..3ce9db0e47eed 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -19,7 +19,7 @@ class EngineCoreRequest: # due to circular imports and typing we have in data.py request_id: str - # NOTE(ywang96): original text prompt is needed when a request is added to + # NOTE(ywang96): original text prompt is needed when a request is added to # Detokenizer, but set to None when it is added to EngineCoreClient. prompt: Optional[str] prompt_token_ids: List[int]