[CI] Avoid naming different metrics with the same name in performance benchmark (vllm-project#5615)

KuntaiDu · Robert Shaw · commit 5abb0c819b1f · 2024-06-23T21:21:31.000Z
diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -15,10 +15,10 @@
     "avg_latency": "Mean latency (ms)",
     # "P10": "P10 (s)",
     # "P25": "P25 (s)",
-    "P50": "Median",
+    "P50": "Median latency (ms)",
     # "P75": "P75 (s)",
     # "P90": "P90 (s)",
-    "P99": "P99",
+    "P99": "P99 latency (ms)",
 }
 
 # throughput tests and the keys that will be printed into markdown
@@ -43,15 +43,14 @@
     # "input_throughput": "Input Tput (tok/s)",
     # "output_throughput": "Output Tput (tok/s)",
     "mean_ttft_ms": "Mean TTFT (ms)",
-    # do not say TTFT again to avoid the table getting too wide
-    "median_ttft_ms": "Median",
-    "p99_ttft_ms": "P99",
+    "median_ttft_ms": "Median TTFT (ms)",
+    "p99_ttft_ms": "P99 TTFT (ms)",
     # "mean_tpot_ms": "Mean TPOT (ms)",
     # "median_tpot_ms": "Median",
     # "p99_tpot_ms": "P99",
     "mean_itl_ms": "Mean ITL (ms)",
-    "median_itl_ms": "Median",
-    "p99_itl_ms": "P99",
+    "median_itl_ms": "Median ITL (ms)",
+    "p99_itl_ms": "P99 ITL (ms)",
 }
 
 
@@ -183,3 +182,11 @@ def results_to_json(latency, throughput, serving):
             serving_tests_markdown_table=serving_md_table,
             benchmarking_results_in_json_string=processed_results_json)
         f.write(results)
+
+    # document benchmarking results in json
+    with open(results_folder / "benchmark_results.json", "w") as f:
+
+        results = latency_results.to_dict(
+            orient='records') + throughput_results.to_dict(
+                orient='records') + serving_results.to_dict(orient='records')
+        f.write(json.dumps(results))