From 4cfb3c4fcf94197167238ae3a55042f8ea9a4247 Mon Sep 17 00:00:00 2001 From: Aaron <29749331+aarnphm@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:43:40 -0500 Subject: [PATCH 1/3] feat(api_server): add options for uers to fine tune prompt output Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- vllm/entrypoints/api_server.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index 58ea2e2291255..abd3af83f0e0e 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -36,9 +36,14 @@ async def generate(request: Request) -> Response: async def stream_results() -> AsyncGenerator[bytes, None]: async for request_output in results_generator: prompt = request_output.prompt - text_outputs = [ - prompt + output.text for output in request_output.outputs - ] + if args.echo: + text_outputs = [ + prompt + output.text for output in request_output.outputs + ] + else: + text_outputs = [ + output.text for output in request_output.outputs + ] ret = {"text": text_outputs} yield (json.dumps(ret) + "\0").encode("utf-8") @@ -71,6 +76,11 @@ async def abort_request() -> None: parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="localhost") parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--echo", + type=bool, + action='store_true', + default=True, + help='Whether to add prompt into outputs') parser = AsyncEngineArgs.add_cli_args(parser) args = parser.parse_args() From 117a06221cfd143954509f22bcf4918546f3a328 Mon Sep 17 00:00:00 2001 From: Aaron <29749331+aarnphm@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:45:30 -0500 Subject: [PATCH 2/3] fix(lint): update linter for consistent quote format Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- vllm/entrypoints/api_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index abd3af83f0e0e..24a4f15d1e6d3 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -78,9 +78,9 @@ async def abort_request() -> None: parser.add_argument("--port", type=int, default=8000) parser.add_argument("--echo", type=bool, - action='store_true', + action="store_true", default=True, - help='Whether to add prompt into outputs') + help="Whether to add prompt into outputs") parser = AsyncEngineArgs.add_cli_args(parser) args = parser.parse_args() From ff305fcc70cd62b1a6fcacc6685d196dce6d71eb Mon Sep 17 00:00:00 2001 From: Aaron <29749331+aarnphm@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:47:37 -0500 Subject: [PATCH 3/3] chore: update implementation Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --- vllm/entrypoints/api_server.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index 24a4f15d1e6d3..c8804d92f286b 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -36,14 +36,10 @@ async def generate(request: Request) -> Response: async def stream_results() -> AsyncGenerator[bytes, None]: async for request_output in results_generator: prompt = request_output.prompt - if args.echo: - text_outputs = [ - prompt + output.text for output in request_output.outputs - ] - else: - text_outputs = [ - output.text for output in request_output.outputs - ] + text_outputs = [ + (prompt + output.text if args.echo else output.text) + for output in request_output.outputs + ] ret = {"text": text_outputs} yield (json.dumps(ret) + "\0").encode("utf-8")