|
28 | 28 | cleanup_remaining_deployments, |
29 | 29 | ) |
30 | 30 | from utils.genai_perf import benchmark_decode, benchmark_prefill |
31 | | -from utils.plot import ( |
32 | | - plot_decode_performance, |
33 | | - plot_prefill_performance, |
34 | | -) |
| 31 | +from utils.plot import plot_decode_performance, plot_prefill_performance |
35 | 32 | from utils.profile_cache import ( |
36 | 33 | check_decode_results_exist, |
37 | 34 | check_prefill_results_exist, |
38 | 35 | load_existing_decode_results, |
39 | 36 | load_existing_prefill_results, |
40 | 37 | ) |
41 | | -from utils.profile_prefill import profile_prefill |
42 | 38 | from utils.profile_deocde import profile_decode |
| 39 | +from utils.profile_prefill import profile_prefill |
43 | 40 |
|
44 | 41 | logger = logging.getLogger(__name__) |
45 | 42 | logger.setLevel(logging.INFO) |
@@ -419,8 +416,12 @@ async def run_profile(args): |
419 | 416 | base_url = client.get_service_url() |
420 | 417 |
|
421 | 418 | profile_prefill( |
422 | | - work_dir, model_name, base_url, best_prefill_tp, |
423 | | - args.max_context_length, args.prefill_interpolation_granularity, |
| 419 | + work_dir, |
| 420 | + model_name, |
| 421 | + base_url, |
| 422 | + best_prefill_tp, |
| 423 | + args.max_context_length, |
| 424 | + args.prefill_interpolation_granularity, |
424 | 425 | ) |
425 | 426 |
|
426 | 427 | print("Cleaning up deployment...") |
@@ -468,8 +469,13 @@ async def run_profile(args): |
468 | 469 | base_url = client.get_service_url() |
469 | 470 |
|
470 | 471 | profile_decode( |
471 | | - work_dir, model_name, base_url, best_decode_tp, max_kv_tokens, |
472 | | - args.max_context_length, args.decode_interpolation_granularity |
| 472 | + work_dir, |
| 473 | + model_name, |
| 474 | + base_url, |
| 475 | + best_decode_tp, |
| 476 | + max_kv_tokens, |
| 477 | + args.max_context_length, |
| 478 | + args.decode_interpolation_granularity, |
473 | 479 | ) |
474 | 480 |
|
475 | 481 | print("Cleaning up deployment...") |
|
0 commit comments