|
15 | 15 | "avg_latency": "Mean latency (ms)", |
16 | 16 | # "P10": "P10 (s)", |
17 | 17 | # "P25": "P25 (s)", |
18 | | - "P50": "Median", |
| 18 | + "P50": "Median latency (ms)", |
19 | 19 | # "P75": "P75 (s)", |
20 | 20 | # "P90": "P90 (s)", |
21 | | - "P99": "P99", |
| 21 | + "P99": "P99 latency (ms)", |
22 | 22 | } |
23 | 23 |
|
24 | 24 | # throughput tests and the keys that will be printed into markdown |
|
43 | 43 | # "input_throughput": "Input Tput (tok/s)", |
44 | 44 | # "output_throughput": "Output Tput (tok/s)", |
45 | 45 | "mean_ttft_ms": "Mean TTFT (ms)", |
46 | | - # do not say TTFT again to avoid the table getting too wide |
47 | | - "median_ttft_ms": "Median", |
48 | | - "p99_ttft_ms": "P99", |
| 46 | + "median_ttft_ms": "Median TTFT (ms)", |
| 47 | + "p99_ttft_ms": "P99 TTFT (ms)", |
49 | 48 | # "mean_tpot_ms": "Mean TPOT (ms)", |
50 | 49 | # "median_tpot_ms": "Median", |
51 | 50 | # "p99_tpot_ms": "P99", |
52 | 51 | "mean_itl_ms": "Mean ITL (ms)", |
53 | | - "median_itl_ms": "Median", |
54 | | - "p99_itl_ms": "P99", |
| 52 | + "median_itl_ms": "Median ITL (ms)", |
| 53 | + "p99_itl_ms": "P99 ITL (ms)", |
55 | 54 | } |
56 | 55 |
|
57 | 56 |
|
@@ -183,3 +182,11 @@ def results_to_json(latency, throughput, serving): |
183 | 182 | serving_tests_markdown_table=serving_md_table, |
184 | 183 | benchmarking_results_in_json_string=processed_results_json) |
185 | 184 | f.write(results) |
| 185 | + |
| 186 | + # document benchmarking results in json |
| 187 | + with open(results_folder / "benchmark_results.json", "w") as f: |
| 188 | + |
| 189 | + results = latency_results.to_dict( |
| 190 | + orient='records') + throughput_results.to_dict( |
| 191 | + orient='records') + serving_results.to_dict(orient='records') |
| 192 | + f.write(json.dumps(results)) |
0 commit comments