Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/composable_kernel
Submodule composable_kernel updated 83 files
+34 −8 Jenkinsfile
+0 −3 example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
+10 −21 example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py
+1 −1 example/ck_tile/03_gemm/README.md
+4 −4 example/ck_tile/03_gemm/gemm_utils.hpp
+1 −1 example/ck_tile/03_gemm/gemm_weight_preshuffle.cpp
+10 −2 example/ck_tile/03_gemm/run_gemm_example.inc
+1 −1 example/ck_tile/18_flatmm/run_flatmm_example.inc
+4 −2 example/ck_tile/37_transpose/transpose_policy.hpp
+168 −56 include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_mx_bpreshuffle.hpp
+1 −0 include/ck_tile/core.hpp
+6 −4 include/ck_tile/core/arch/amd_buffer_addressing.hpp
+6 −4 include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp
+30 −28 include/ck_tile/core/arch/amd_transpose_load_encoding.hpp
+25 −40 include/ck_tile/core/tensor/buffer_view.hpp
+206 −122 include/ck_tile/core/tensor/load_tile_transpose.hpp
+156 −0 include/ck_tile/core/utility/debug.hpp
+2 −2 include/ck_tile/ops/flatmm/block/flatmm_32x512x128_1x4x1_16x16x32.hpp
+2 −2 include/ck_tile/ops/flatmm/block/flatmm_sn_32x128x512_1x4x1_16x16x32.hpp
+3 −3 include/ck_tile/ops/fmha.hpp
+3 −3 include/ck_tile/ops/gemm.hpp
+40 −6 include/ck_tile/ops/gemm/block/block_gemm_asmem_bsmem_creg_v1_default_policy.hpp
+119 −35 include/ck_tile/ops/gemm/block/block_universal_gemm_as_bs_cr.hpp
+50 −17 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_base.hpp
+13 −7 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v3.hpp
+76 −65 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4.hpp
+15 −3 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4_default_policy.hpp
+51 −28 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_mem.hpp
+2 −0 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp
+3 −0 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp
+97 −47 include/ck_tile/ops/gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp
+1 −1 include/ck_tile/ops/gemm/pipeline/wp_pipeline_agmem_bgmem_creg_v1.hpp
+61 −23 include/ck_tile/ops/gemm/warp/warp_gemm.hpp
+154 −125 include/ck_tile/ops/gemm/warp/warp_gemm_attribute_mfma.hpp
+44 −17 include/ck_tile/ops/gemm/warp/warp_gemm_dispatcher.hpp
+13 −4 ...sor_operation_instance/gpu/gemm_mx/device_gemm_mx_xdl_f4_f4_f16/device_gemm_mx_xdl_f4_f4_f16_mk_mfma_mn.hpp
+5 −2 script/cmake-ck-dev.sh
+5 −2 script/cmake-ck-release.sh
+3 −0 test/CMakeLists.txt
+2 −0 test/ck_tile/CMakeLists.txt
+33 −0 test/ck_tile/batched_transpose/CMakeLists.txt
+25 −0 test/ck_tile/batched_transpose/batched_transpose.hpp
+283 −0 test/ck_tile/batched_transpose/batched_transpose.inc
+113 −0 test/ck_tile/batched_transpose/batched_transpose_api.cpp
+10 −0 test/ck_tile/batched_transpose/batched_transpose_bf16.cpp
+10 −0 test/ck_tile/batched_transpose/batched_transpose_fp16.cpp
+10 −0 test/ck_tile/batched_transpose/batched_transpose_fp8.cpp
+2 −2 test/ck_tile/gemm/test_gemm_pipeline_util.hpp
+28 −0 test/ck_tile/smoothquant/CMakeLists.txt
+21 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n1024_instance.cpp
+12 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n1536_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n2048_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n256_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n3072_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n4096_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n4096_tp_instance.cpp
+12 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n512_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n64_n128_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_bf16_n768_instance.cpp
+21 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n1024_instance.cpp
+12 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n1536_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n2048_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n256_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n3072_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n4096_instance.cpp
+13 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n4096_tp_instance.cpp
+12 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n512_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n64_n128_instance.cpp
+11 −0 test/ck_tile/smoothquant/instances/smoothquant_fp16_n768_instance.cpp
+143 −0 test/ck_tile/smoothquant/instances/smoothquant_fwd_api.cpp
+61 −0 test/ck_tile/smoothquant/instances/smoothquant_instance_common.hpp
+114 −0 test/ck_tile/smoothquant/smoothquant.hpp
+274 −0 test/ck_tile/smoothquant/smoothquant.inc
+11 −0 test/ck_tile/smoothquant/smoothquant_bf16.cpp
+11 −0 test/ck_tile/smoothquant/smoothquant_fp16.cpp
+35 −21 tile_engine/ops/gemm/CMakeLists.txt
+15 −11 tile_engine/ops/gemm/README.md
+0 −15 tile_engine/ops/gemm/configs/benchmark.json
+82 −0 tile_engine/ops/gemm/configs/custom_ci_config.json
+0 −15 tile_engine/ops/gemm/configs/default_config.json
+0 −15 tile_engine/ops/gemm/configs/user_provided_config.json
+34 −16 tile_engine/ops/gemm/gemm_instance_builder.py
+29 −19 tile_engine/ops/gemm/json_config.py