Skip to content

Commit 760f49c

Browse files
lengrongfuepwalsh
authored andcommitted
[bugfix] fix profile impact benchmark results (vllm-project#21507)
Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
1 parent fc5c0fd commit 760f49c

File tree

3 files changed

+41
-42
lines changed

3 files changed

+41
-42
lines changed

benchmarks/benchmark_serving.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,20 +396,6 @@ async def limited_request_func(request_func_input, pbar):
396396
tasks.append(asyncio.create_task(task))
397397
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
398398

399-
if profile:
400-
print("Stopping profiler...")
401-
profile_input = RequestFuncInput(
402-
model=model_id,
403-
prompt=test_prompt,
404-
api_url=base_url + "/stop_profile",
405-
prompt_len=test_prompt_len,
406-
output_len=test_output_len,
407-
logprobs=logprobs,
408-
)
409-
profile_output = await request_func(request_func_input=profile_input)
410-
if profile_output.success:
411-
print("Profiler stopped")
412-
413399
if pbar is not None:
414400
pbar.close()
415401

@@ -518,6 +504,20 @@ def process_one_metric(
518504

519505
print("=" * 50)
520506

507+
if profile:
508+
print("Stopping profiler...")
509+
profile_input = RequestFuncInput(
510+
model=model_id,
511+
prompt=test_prompt,
512+
api_url=base_url + "/stop_profile",
513+
prompt_len=test_prompt_len,
514+
output_len=test_output_len,
515+
logprobs=logprobs,
516+
)
517+
profile_output = await request_func(request_func_input=profile_input)
518+
if profile_output.success:
519+
print("Profiler stopped")
520+
521521
return result
522522

523523

benchmarks/benchmark_serving_structured_output.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -538,20 +538,6 @@ async def limited_request_func(request_func_input, pbar):
538538
)
539539
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
540540

541-
if profile:
542-
print("Stopping profiler...")
543-
profile_input = RequestFuncInput(
544-
model=model_id,
545-
prompt=test_request.prompt,
546-
api_url=base_url + "/stop_profile",
547-
prompt_len=test_request.prompt_len,
548-
output_len=test_request.expected_output_len,
549-
extra_body={test_request.structure_type: test_request.schema},
550-
)
551-
profile_output = await request_func(request_func_input=profile_input)
552-
if profile_output.success:
553-
print("Profiler stopped")
554-
555541
if pbar is not None:
556542
pbar.close()
557543

@@ -666,6 +652,20 @@ def process_one_metric(
666652

667653
print("=" * 50)
668654

655+
if profile:
656+
print("Stopping profiler...")
657+
profile_input = RequestFuncInput(
658+
model=model_id,
659+
prompt=test_request.prompt,
660+
api_url=base_url + "/stop_profile",
661+
prompt_len=test_request.prompt_len,
662+
output_len=test_request.expected_output_len,
663+
extra_body={test_request.structure_type: test_request.schema},
664+
)
665+
profile_output = await request_func(request_func_input=profile_input)
666+
if profile_output.success:
667+
print("Profiler stopped")
668+
669669
return result, ret
670670

671671

vllm/benchmarks/serve.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -470,20 +470,6 @@ async def limited_request_func(request_func_input, pbar):
470470
pbar=pbar)))
471471
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
472472

473-
if profile:
474-
print("Stopping profiler...")
475-
profile_input = RequestFuncInput(
476-
model=model_id,
477-
prompt=test_prompt,
478-
api_url=base_url + "/stop_profile",
479-
prompt_len=test_prompt_len,
480-
output_len=test_output_len,
481-
logprobs=logprobs,
482-
)
483-
profile_output = await request_func(request_func_input=profile_input)
484-
if profile_output.success:
485-
print("Profiler stopped")
486-
487473
if pbar is not None:
488474
pbar.close()
489475

@@ -576,6 +562,19 @@ def process_one_metric(
576562

577563
print("=" * 50)
578564

565+
if profile:
566+
print("Stopping profiler...")
567+
profile_input = RequestFuncInput(
568+
model=model_id,
569+
prompt=test_prompt,
570+
api_url=base_url + "/stop_profile",
571+
prompt_len=test_prompt_len,
572+
output_len=test_output_len,
573+
logprobs=logprobs,
574+
)
575+
profile_output = await request_func(request_func_input=profile_input)
576+
if profile_output.success:
577+
print("Profiler stopped")
579578
return result
580579

581580

0 commit comments

Comments
 (0)