Skip to content

Commit 363b7e4

Browse files
committed
feat: generate efficiency plot
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
1 parent 9bd7d0c commit 363b7e4

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

benchmarks/utils/plot.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,104 @@ def create_plot(
142142
print(f"Saved plot: {output_path}")
143143

144144

145+
def create_efficiency_plot(
146+
deployment_results: Dict, plots_dir: Path, output_tokens: int = 200
147+
) -> None:
148+
"""
149+
Create an efficiency plot showing tok/s/gpu vs tok/s/user with concurrency as labeled points.
150+
151+
Args:
152+
deployment_results: Dict of deployment_type -> results
153+
plots_dir: Directory to save plots
154+
output_tokens: Average output tokens per request (default 200)
155+
"""
156+
plt.figure(figsize=(12, 8))
157+
158+
colors = {"agg": "#1f77b4", "disagg": "#ff7f0e", "vanilla": "#2ca02c"}
159+
markers = {"agg": "o", "disagg": "s", "vanilla": "^"}
160+
161+
for deployment_type, results in deployment_results.items():
162+
tok_s_per_user = []
163+
tok_s_per_gpu = []
164+
concurrency_levels = []
165+
166+
for concurrency, metrics in results:
167+
try:
168+
# Get request throughput (requests/sec)
169+
request_throughput = metrics["request_throughput"]["avg"]
170+
171+
# Calculate total tokens per second
172+
total_tok_s = request_throughput * output_tokens
173+
174+
# Calculate tok/s per user and per GPU
175+
tok_s_user = total_tok_s / concurrency
176+
tok_s_gpu = total_tok_s # Assuming 1 GPU per deployment
177+
178+
tok_s_per_user.append(tok_s_user)
179+
tok_s_per_gpu.append(tok_s_gpu)
180+
concurrency_levels.append(concurrency)
181+
182+
except KeyError as e:
183+
print(
184+
f"Warning: Missing metric for {deployment_type} concurrency {concurrency}: {e}"
185+
)
186+
continue
187+
188+
if tok_s_per_user and tok_s_per_gpu:
189+
# Plot points
190+
color = colors.get(deployment_type, "#888888")
191+
marker = markers.get(deployment_type, "o")
192+
193+
plt.scatter(
194+
tok_s_per_user,
195+
tok_s_per_gpu,
196+
c=color,
197+
marker=marker,
198+
s=120,
199+
alpha=0.8,
200+
label=deployment_type.title(),
201+
edgecolors="black",
202+
linewidth=1.5,
203+
)
204+
205+
# Add concurrency labels
206+
for i, (x, y, c) in enumerate(
207+
zip(tok_s_per_user, tok_s_per_gpu, concurrency_levels)
208+
):
209+
plt.annotate(
210+
f"{c}",
211+
(x, y),
212+
xytext=(8, 8),
213+
textcoords="offset points",
214+
fontsize=10,
215+
fontweight="bold",
216+
ha="left",
217+
)
218+
219+
plt.title("GPU Efficiency vs User Experience", fontsize=14, fontweight="bold")
220+
plt.xlabel("Tokens/sec per User", fontsize=12)
221+
plt.ylabel("Tokens/sec per GPU", fontsize=12)
222+
plt.grid(True, alpha=0.3)
223+
224+
# Add a note about what the numbers represent
225+
plt.figtext(
226+
0.02,
227+
0.02,
228+
"Note: Numbers on dots indicate concurrency level",
229+
fontsize=10,
230+
style="italic",
231+
alpha=0.7,
232+
)
233+
234+
plt.legend()
235+
236+
plt.tight_layout()
237+
output_path = plots_dir / "efficiency_tok_s_gpu_vs_user.png"
238+
plt.savefig(output_path, dpi=300, bbox_inches="tight")
239+
plt.close()
240+
print(f"Saved efficiency plot: {output_path}")
241+
242+
145243
def generate_plots(base_output_dir: Path) -> None:
146244
"""
147245
Generate performance plots from benchmark results.
@@ -248,6 +346,9 @@ def generate_plots(base_output_dir: Path) -> None:
248346
log_scale_x=True,
249347
)
250348

349+
# 5. Efficiency plot: tok/s/gpu vs tok/s/user
350+
create_efficiency_plot(deployment_results, plots_dir)
351+
251352
# Generate summary
252353
summary_lines = [
253354
"Benchmark Results Summary",
@@ -273,6 +374,7 @@ def generate_plots(base_output_dir: Path) -> None:
273374
" - avg_inter_token_latency_vs_concurrency.png",
274375
" - request_throughput_vs_concurrency.png",
275376
" - avg_time_to_first_token_vs_concurrency.png",
377+
" - efficiency_tok_s_gpu_vs_user.png",
276378
]
277379
)
278380

0 commit comments

Comments
 (0)