-
Notifications
You must be signed in to change notification settings - Fork 332
[Feature]: Add benchmark scripts for examples #1240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f129b3b
de9c98a
12df0ec
725342d
ec019fc
29677d2
09912b5
17504a0
f2a4128
7fa4d18
6edc3a9
041aa95
4767697
31bfbe6
c16ebfe
2921c73
1c23908
730ae68
164672f
c7b3413
494ba47
326b8e5
7d75df2
58e7ffe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| import tilelang.tools.bench | ||
| import example_conv_analyze | ||
| import example_gemm_analyze | ||
|
|
||
|
|
||
| def bench_example_gemm_analyze(): | ||
| tilelang.tools.bench.process_func(example_gemm_analyze.main) | ||
|
|
||
|
|
||
| def bench_example_conv_analyze(): | ||
| tilelang.tools.bench.process_func(example_conv_analyze.main) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| import tilelang.tools.bench | ||
| import example_gqa_sink_bwd_bhsd | ||
| import example_gqa_sink_fwd_bhsd_wgmma_pipelined | ||
| import example_mha_sink_bwd_bhsd | ||
| import example_mha_sink_fwd_bhsd | ||
| import example_mha_sink_fwd_bhsd_wgmma_pipelined | ||
|
|
||
|
|
||
| def bench_example_mha_sink_fwd_bhsd(): | ||
| tilelang.tools.bench.process_func(example_mha_sink_fwd_bhsd.main) | ||
|
|
||
|
|
||
| def bench_example_mha_sink_fwd_bhsd_sliding_window(): | ||
| tilelang.tools.bench.process_func(example_mha_sink_fwd_bhsd.main, window_size=128) | ||
|
|
||
|
|
||
| def bench_example_mha_sink_fwd_bhsd_wgmma_pipelined(): | ||
| tilelang.tools.bench.process_func(example_mha_sink_fwd_bhsd_wgmma_pipelined.main) | ||
|
|
||
|
|
||
| def bench_example_mha_sink_fwd_bhsd_wgmma_pipelined_sliding_window(): | ||
| tilelang.tools.bench.process_func( | ||
| example_mha_sink_fwd_bhsd_wgmma_pipelined.main, window_size=128) | ||
|
|
||
|
|
||
| def bench_example_gqa_sink_fwd_bhsd_wgmma_pipelined(): | ||
| tilelang.tools.bench.process_func(example_gqa_sink_fwd_bhsd_wgmma_pipelined.main) | ||
|
|
||
|
|
||
| def bench_example_gqa_sink_fwd_bhsd_wgmma_pipelined_sliding_window(): | ||
| tilelang.tools.bench.process_func( | ||
| example_gqa_sink_fwd_bhsd_wgmma_pipelined.main, window_size=128) | ||
|
|
||
|
|
||
| def bench_example_mha_sink_bwd_bhsd(): | ||
| tilelang.tools.bench.process_func(example_mha_sink_bwd_bhsd.main) | ||
|
|
||
|
|
||
| def bench_example_mha_sink_bwd_bhsd_sliding_window(): | ||
| tilelang.tools.bench.process_func(example_mha_sink_bwd_bhsd.main, window_size=128) | ||
|
|
||
|
|
||
| def bench_example_gqa_sink_bwd_bhsd(): | ||
| tilelang.tools.bench.process_func(example_gqa_sink_bwd_bhsd.main) | ||
|
|
||
|
|
||
| def bench_example_gqa_sink_bwd_bhsd_sliding_window(): | ||
| tilelang.tools.bench.process_func(example_gqa_sink_bwd_bhsd.main, window_size=128) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| import tilelang.tools.bench | ||
| import block_sparse_attn_triton | ||
| import example_tilelang_block_sparse_attn | ||
| import example_tilelang_sparse_gqa_decode_varlen_indice | ||
| import example_tilelang_sparse_gqa_decode_varlen_mask | ||
| import example_triton_sparse_gqa_decode_varlen_indice | ||
| import example_triton_sparse_gqa_decode_varlen_mask | ||
|
|
||
|
|
||
| def bench_block_sparse_attn_triton(): | ||
| tilelang.tools.bench.process_func(block_sparse_attn_triton.main) | ||
|
|
||
|
|
||
| def bench_example_tilelang_block_sparse_attn(): | ||
| tilelang.tools.bench.process_func(example_tilelang_block_sparse_attn.main) | ||
|
|
||
|
|
||
| def bench_example_tilelang_sparse_gqa_decode_varlen_indice(): | ||
| tilelang.tools.bench.process_func( | ||
| example_tilelang_sparse_gqa_decode_varlen_indice.main, batch=1, max_cache_seqlen=2048) | ||
|
|
||
|
|
||
| def bench_example_tilelang_sparse_gqa_decode_varlen_mask(): | ||
| tilelang.tools.bench.process_func( | ||
| example_tilelang_sparse_gqa_decode_varlen_mask.main, batch=1, max_cache_seqlen=2048) | ||
|
|
||
|
|
||
| def bench_example_triton_sparse_gqa_decode_varlen_indice(): | ||
| tilelang.tools.bench.process_func( | ||
| example_triton_sparse_gqa_decode_varlen_indice.main, | ||
| batch=8, | ||
| heads=8, | ||
| heads_kv=4, | ||
| max_cache_seqlen=2048, | ||
| dim=128, | ||
| dim_v=128, | ||
| sparse_ratio=0.8, | ||
| block_size=32) | ||
|
|
||
|
|
||
| def bench_example_triton_sparse_gqa_decode_varlen_mask(): | ||
| tilelang.tools.bench.process_func( | ||
| example_triton_sparse_gqa_decode_varlen_mask.main, | ||
| batch=8, | ||
| heads=8, | ||
| heads_kv=4, | ||
| max_cache_seqlen=2048, | ||
| dim=128, | ||
| dim_v=128, | ||
| sparse_ratio=0.8, | ||
| block_size=32) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| import tilelang.tools.bench | ||
| import example_blocksparse_gemm | ||
|
|
||
|
|
||
| def bench_example_blocksparse_gemm(): | ||
| tilelang.tools.bench.process_func(example_blocksparse_gemm.main) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| import tilelang.tools.bench | ||
| import example_group_per_split_token_cast_to_fp8 | ||
| import example_per_token_cast_to_fp8 | ||
|
|
||
|
|
||
| def bench_example_group_per_split_token_cast_to_fp8(): | ||
| tilelang.tools.bench.process_func( | ||
| example_group_per_split_token_cast_to_fp8.main, | ||
| M=1024, | ||
| N=1024, | ||
| BG=2, | ||
| blk_m=4, | ||
| batch_sizes=[128, 896]) | ||
|
|
||
|
|
||
| def bench_example_per_token_cast_to_fp8(): | ||
| tilelang.tools.bench.process_func(example_per_token_cast_to_fp8.main, M=2048, N=512, blk_m=8) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| import tilelang.tools.bench | ||
| import example_convolution | ||
| import example_convolution_autotune | ||
|
|
||
|
|
||
| def bench_example_convolution(): | ||
| tilelang.tools.bench.process_func(example_convolution.main) | ||
|
|
||
|
|
||
| def bench_example_convolution_autotune(): | ||
| tilelang.tools.bench.process_func(example_convolution_autotune.main) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,10 @@ | ||||||||||||
| import tilelang.tools.bench | ||||||||||||
| import example_deepgemm_fp8_2xAcc | ||||||||||||
|
Comment on lines
+1
to
+2
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing import for decorator module. Lines 5-6 use Add the missing import: import tilelang.tools.bench
+import tilelang.testing
import example_deepgemm_fp8_2xAcc📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def bench_example_deepgemm_fp8_2xAcc(): | ||||||||||||
| tilelang.tools.bench.process_func(example_deepgemm_fp8_2xAcc.main) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| if globals().get("__name__") == "__main__": | ||||||||||||
| tilelang.tools.bench.main() | ||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,10 @@ | ||||||||||||
| import tilelang.tools.bench | ||||||||||||
| import example_mla_decode | ||||||||||||
|
Comment on lines
+1
to
+2
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical: Missing import for decorators. The decorators on lines 5-6 reference Apply this diff to add the missing import: import tilelang.tools.bench
+import tilelang.testing
import example_mla_decode📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def bench_example_mla_decode(): | ||||||||||||
| tilelang.tools.bench.process_func(example_mla_decode.main) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| if globals().get("__name__") == "__main__": | ||||||||||||
| tilelang.tools.bench.main() | ||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| import tilelang.tools.bench | ||
| import example_tilelang_nsa_fwd | ||
| import example_tilelang_nsa_decode | ||
|
Comment on lines
+2
to
+3
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainVerify that the example modules are importable. The relative imports assume the example modules are in the same directory or on PYTHONPATH. Ensure that these modules exist and can be imported when the script is run. Run the following script to verify the modules exist: 🏁 Script executed: #!/bin/bash
# Description: Verify the example modules exist in the expected location
# Check if the example modules exist
fd -t f "example_tilelang_nsa_fwd.py" examples/deepseek_nsa/
fd -t f "example_tilelang_nsa_decode.py" examples/deepseek_nsa/Length of output: 102 Length of output: 400 Length of output: 137 Length of output: 2643 Length of output: 464 Fix bare imports to use relative syntax or add sys.path setup. Lines 2-3 use bare absolute imports that will fail at runtime unless from . import example_tilelang_nsa_fwd
from . import example_tilelang_nsa_decodeAlternatively, add to the top of the file: import sys
import os
sys.path.insert(0, os.path.dirname(__file__))The test file ( 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def bench_example_tilelang_nsa_fwd(): | ||
| tilelang.tools.bench.process_func(example_tilelang_nsa_fwd.main) | ||
|
|
||
|
|
||
| def bench_example_tilelang_nsa_fwd_decode(): | ||
| tilelang.tools.bench.process_func(example_tilelang_nsa_decode.main) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,64 @@ | ||||||||||||||||||||||||||||
| import tilelang.tools.bench | ||||||||||||||||||||||||||||
| import fp8_lighting_indexer | ||||||||||||||||||||||||||||
| import sparse_mla_bwd | ||||||||||||||||||||||||||||
| import sparse_mla_fwd | ||||||||||||||||||||||||||||
| import sparse_mla_fwd_pipelined | ||||||||||||||||||||||||||||
| import topk_selector | ||||||||||||||||||||||||||||
|
Comment on lines
+1
to
+6
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing import for decorator module. Lines 17-18, 23-24, and 29-30 use Add the missing import: import tilelang.tools.bench
+import tilelang.testing
import fp8_lighting_indexer
import sparse_mla_bwd
import sparse_mla_fwd
import sparse_mla_fwd_pipelined
import topk_selector📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def bench_topk_selector(): | ||||||||||||||||||||||||||||
| tilelang.tools.bench.process_func(topk_selector.test_topk_selector) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def bench_fp8_lighting_indexer(): | ||||||||||||||||||||||||||||
| tilelang.tools.bench.process_func( | ||||||||||||||||||||||||||||
| fp8_lighting_indexer.test_fp8_lighting_indexer, | ||||||||||||||||||||||||||||
| S=512, | ||||||||||||||||||||||||||||
| SKV=1024, | ||||||||||||||||||||||||||||
| H=32, | ||||||||||||||||||||||||||||
| HKV=1, | ||||||||||||||||||||||||||||
| D=64, | ||||||||||||||||||||||||||||
| kv_stride=1) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def bench_sparse_mla_fwd(): | ||||||||||||||||||||||||||||
| tilelang.tools.bench.process_func( | ||||||||||||||||||||||||||||
| sparse_mla_fwd.test_sparse_mla_fwd, | ||||||||||||||||||||||||||||
| S=256, | ||||||||||||||||||||||||||||
| SKV=1024, | ||||||||||||||||||||||||||||
| H=64, | ||||||||||||||||||||||||||||
| HKV=1, | ||||||||||||||||||||||||||||
| DQK=576, | ||||||||||||||||||||||||||||
| DV=512, | ||||||||||||||||||||||||||||
| topk=256, | ||||||||||||||||||||||||||||
| check_correctness=False) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def bench_sparse_mla_fwd_pipelined(): | ||||||||||||||||||||||||||||
| tilelang.tools.bench.process_func( | ||||||||||||||||||||||||||||
| sparse_mla_fwd_pipelined.test_sparse_mla_fwd_pipelined, | ||||||||||||||||||||||||||||
| S=256, | ||||||||||||||||||||||||||||
| SKV=512, | ||||||||||||||||||||||||||||
| H=64, | ||||||||||||||||||||||||||||
| HKV=1, | ||||||||||||||||||||||||||||
| DQK=576, | ||||||||||||||||||||||||||||
| DV=512, | ||||||||||||||||||||||||||||
| topk=256, | ||||||||||||||||||||||||||||
| check_correctness=False) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def bench_sparse_mla_bwd(): | ||||||||||||||||||||||||||||
| tilelang.tools.bench.process_func( | ||||||||||||||||||||||||||||
| sparse_mla_bwd.test_sparse_mla_bwd, | ||||||||||||||||||||||||||||
| S=256, | ||||||||||||||||||||||||||||
| SKV=512, | ||||||||||||||||||||||||||||
| H=64, | ||||||||||||||||||||||||||||
| HKV=1, | ||||||||||||||||||||||||||||
| DQKV=576, | ||||||||||||||||||||||||||||
| DV=512, | ||||||||||||||||||||||||||||
| topk=256, | ||||||||||||||||||||||||||||
| check_correctness=False) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| if globals().get("__name__") == "__main__": | ||||||||||||||||||||||||||||
| tilelang.tools.bench.main() | ||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| import tilelang.tools.bench | ||
| import example_dequant_gemm_bf16_mxfp4_hopper | ||
| import example_dequant_gemm_bf16_mxfp4_hopper_tma | ||
| import example_dequant_gemm_fp4_hopper | ||
| import example_dequant_gemm_w4a8 | ||
| import example_dequant_gemv_fp16xint4 | ||
| import example_dequant_groupedgemm_bf16_mxfp4_hopper | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| def bench_example_dequant_gemv_fp16xint4(): | ||
| tilelang.tools.bench.process_func(example_dequant_gemv_fp16xint4.main) | ||
|
|
||
|
|
||
| def bench_example_dequant_gemm_fp4_hopper(): | ||
| tilelang.tools.bench.process_func(example_dequant_gemm_fp4_hopper.main) | ||
|
|
||
|
|
||
| def bench_example_dequant_gemm_bf16_mxfp4_hopper(): | ||
| tilelang.tools.bench.process_func(example_dequant_gemm_bf16_mxfp4_hopper.main) | ||
|
|
||
|
|
||
| def bench_example_dequant_gemm_bf16_mxfp4_hopper_tma(): | ||
| tilelang.tools.bench.process_func(example_dequant_gemm_bf16_mxfp4_hopper_tma.main) | ||
|
|
||
|
|
||
| def bench_example_dequant_groupedgemm_bf16_mxfp4_hopper(): | ||
| tilelang.tools.bench.process_func(example_dequant_groupedgemm_bf16_mxfp4_hopper.main) | ||
|
|
||
|
|
||
| def bench_example_dequant_gemm_w4a8(): | ||
| tilelang.tools.bench.process_func(example_dequant_gemm_w4a8.main) | ||
|
|
||
|
|
||
| if globals().get("__name__") == "__main__": | ||
| tilelang.tools.bench.main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
Image upload step will not produce a valid embeddable URL
There are a couple of correctness issues in the new PNG upload/comment flow that will break the embedded image:
Filename mismatch between tree entry and URL (definite bug).
perf_plot_${context.runId}.png(Line 101).bench_${context.runId}.png(Line 108).The URL points to a file that does not exist in the created tree, so the image link in the PR comment will 404.
Insufficient token permissions for git APIs.
contents: readonly, butgit.createBlob/git.createTreeneed write-level repo contents permissions. As written, the upload step will likely fail with a 403.ci_performance.py is executed from
main, not from the PR merge commit.checkoutrefs/pull/.../merge, then install the merged version, but latergit checkout mainand never switch back before runningpython maint/scripts/ci_performance.py. That means the script version used at runtime is the one frommain, not the PR. The new bench.md/bench.png generation introduced in this PR won’t be exercised for open-PR/perfruns.Suggested fixes
ci_performance.pyruns from the PR merge commit so the new bench.md/bench.png logic is used. For instance:git checkout mainearlier and then re-checkoutrefs/pull/.../mergebefore the “Run performance test” step, or./tll/bin/pythonand import it from the installed package instead of the working tree, orgit checkout refs/pull/${{ github.event.issue.number }}/mergebefore step “Run performance test”.Also applies to: 81-109, 116-128
🏁 Script executed:
Length of output: 5204
🏁 Script executed:
Length of output: 1189
🏁 Script executed:
Length of output: 273
Image upload step will not produce a valid embeddable URL and will fail with permissions error
All three correctness issues in the new PNG upload/comment flow are confirmed and must be fixed:
Filename mismatch between tree entry and URL (confirmed bug).
perf_plot_${context.runId}.pngbench_${context.runId}.pngInsufficient token permissions for git APIs (confirmed bug).
contents: readonlygithub.rest.git.createBlobandgithub.rest.git.createTreerequire write-level permissionsci_performance.py executes from
main, not the PR merge commit (confirmed bug).git checkout mainswitches to main branchtlvenv/perfruns.Required fixes:
Make the tree path and URL consistent (both should use
bench_prefix):Upgrade workflow permissions to allow blob/tree creation:
Ensure
ci_performance.pyruns from the PR merge commit: after line 68, addgit checkout refs/pull/${{ github.event.issue.number }}/mergebefore the "Run performance test" step.🤖 Prompt for AI Agents