============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x128_64x3_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.199608 ms Memory: 316.782 GiB/s Math: 290621 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x256_64x3_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=256 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=2 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.225044 ms Memory: 280.977 GiB/s Math: 257773 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x64_64x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=64 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.201697 ms Memory: 313.502 GiB/s Math: 287611 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_64x256_64x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=256 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.215194 ms Memory: 293.84 GiB/s Math: 269573 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x32_64x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=32 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.272732 ms Memory: 231.848 GiB/s Math: 212701 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x128_64x5_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=5 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.180623 ms Memory: 350.079 GiB/s Math: 321168 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_64x128_64x6_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=6 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.199721 ms Memory: 316.604 GiB/s Math: 290457 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x32_64x6_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=32 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=6 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.269752 ms Memory: 234.409 GiB/s Math: 215051 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x128_128x3_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=128 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=3 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.183296 ms Memory: 344.974 GiB/s Math: 316485 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x256_128x3_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=256 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=3 --warps_m=2 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.217446 ms Memory: 290.795 GiB/s Math: 266780 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x64_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=64 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.205097 ms Memory: 308.305 GiB/s Math: 282844 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_64x256_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=256 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.232899 ms Memory: 271.502 GiB/s Math: 249080 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_256x32_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=32 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.248207 ms Memory: 254.756 GiB/s Math: 233717 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_32x256_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=32 --cta_n=256 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.269793 ms Memory: 234.374 GiB/s Math: 215018 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x128_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=128 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=4 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.1997 ms Memory: 316.636 GiB/s Math: 290487 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_64x128_128x3_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=128 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.1839 ms Memory: 343.841 GiB/s Math: 315445 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_i16832gemm_s8_s4_128x32_128x4_tn_align32 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s32:column --D=s32:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=32 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 67895296 bytes FLOPs: 58010370048 flops FLOPs/Byte: 854 Runtime: 0.26324 ms Memory: 240.209 GiB/s Math: 220371 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x128_64x3_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.191201 ms Memory: 123.857 GiB/s Math: 303399 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x256_64x3_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=256 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=2 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.231209 ms Memory: 102.425 GiB/s Math: 250900 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x64_64x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=64 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.207749 ms Memory: 113.992 GiB/s Math: 279233 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_64x256_64x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=256 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.219955 ms Memory: 107.666 GiB/s Math: 263737 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x32_64x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=32 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.286484 ms Memory: 82.6629 GiB/s Math: 202490 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x128_64x5_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=5 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.171489 ms Memory: 138.094 GiB/s Math: 338274 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_64x128_64x6_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=128 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=6 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.194468 ms Memory: 121.777 GiB/s Math: 298303 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x32_64x6_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=32 --cta_k=64 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=6 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.278948 ms Memory: 84.8963 GiB/s Math: 207961 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x128_128x3_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=128 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=3 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.166451 ms Memory: 142.274 GiB/s Math: 348513 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x256_128x3_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=256 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=3 --warps_m=2 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.215173 ms Memory: 110.059 GiB/s Math: 269599 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x64_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=64 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.198574 ms Memory: 119.258 GiB/s Math: 292135 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_64x256_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=256 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.237681 ms Memory: 99.6364 GiB/s Math: 244069 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_256x32_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=256 --cta_n=32 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.258499 ms Memory: 91.6123 GiB/s Math: 224413 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_32x256_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=32 --cta_n=256 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=1 --warps_n=4 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.274616 ms Memory: 86.2354 GiB/s Math: 211242 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x128_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=128 --cta_k=128 --cluster_m=1 \ --cluster_n=1 --cluster_k=1 --stages=4 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 \ --min_cc=80 --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.194836 ms Memory: 121.546 GiB/s Math: 297739 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_64x128_128x3_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=64 --cta_n=128 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=3 --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.181146 ms Memory: 130.733 GiB/s Math: 320242 GFLOP/s ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s8_i16832gemm_s8_s4_128x32_128x4_tn_align32 Status: Success Verification: ON Disposition: Incorrect reference_device: Incorrect cuBLAS: Not run cuDNN: Not run Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=2048 --A=s8:row --B=s4:column --C=s8:column --D=s8:column \ --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 --batch_count=1 --raster_order=heuristic \ --swizzle_size=1 --op_class=tensorop --accum=s32 --cta_m=128 --cta_n=32 --cta_k=128 --cluster_m=1 --cluster_n=1 \ --cluster_k=1 --stages=4 --warps_m=4 --warps_n=1 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=32 --min_cc=80 \ --max_cc=1024 Bytes: 25427968 bytes FLOPs: 58010370048 flops FLOPs/Byte: 2281 Runtime: 0.270008 ms Memory: 87.7071 GiB/s Math: 214847 GFLOP/s ============================= CSV Results: Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,D,alpha,beta,split_k_mode,split_k_slices,batch_count,raster_order,swizzle_size,op_class,accum,cta_m,cta_n,cta_k,cluster_m,cluster_n,cluster_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Flops/Byte,Runtime,GB/s,GFLOPs 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x128_64x3_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,128,64,1,1,1,3,4,2,1,16,8,32,80,1024,67895296,58010370048,854,0.199608,316.782,290621 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x256_64x3_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,256,64,1,1,1,3,2,4,1,16,8,32,80,1024,67895296,58010370048,854,0.225044,280.977,257773 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x64_64x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,64,64,1,1,1,4,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.201697,313.502,287611 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_64x256_64x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,256,64,1,1,1,4,1,4,1,16,8,32,80,1024,67895296,58010370048,854,0.215194,293.84,269573 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x32_64x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,32,64,1,1,1,4,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.272732,231.848,212701 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x128_64x5_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,128,64,1,1,1,5,2,2,1,16,8,32,80,1024,67895296,58010370048,854,0.180623,350.079,321168 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_64x128_64x6_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,128,64,1,1,1,6,2,2,1,16,8,32,80,1024,67895296,58010370048,854,0.199721,316.604,290457 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x32_64x6_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,32,64,1,1,1,6,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.269752,234.409,215051 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x128_128x3_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,128,128,1,1,1,3,4,2,1,16,8,32,80,1024,67895296,58010370048,854,0.183296,344.974,316485 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x256_128x3_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,256,128,1,1,1,3,2,4,1,16,8,32,80,1024,67895296,58010370048,854,0.217446,290.795,266780 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x64_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,64,128,1,1,1,4,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.205097,308.305,282844 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_64x256_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,256,128,1,1,1,4,1,4,1,16,8,32,80,1024,67895296,58010370048,854,0.232899,271.502,249080 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_256x32_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,32,128,1,1,1,4,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.248207,254.756,233717 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_32x256_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,32,256,128,1,1,1,4,1,4,1,16,8,32,80,1024,67895296,58010370048,854,0.269793,234.374,215018 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x128_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,128,128,1,1,1,4,2,2,1,16,8,32,80,1024,67895296,58010370048,854,0.1997,316.636,290487 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_64x128_128x3_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,128,128,1,1,1,3,2,2,1,16,8,32,80,1024,67895296,58010370048,854,0.1839,343.841,315445 1,CUTLASS,gemm,cutlass_tensorop_i16832gemm_s8_s4_128x32_128x4_tn_align32,passed,success,universal,3456,4096,2048,s8:row,s4:column,s32:column,s32:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,32,128,1,1,1,4,4,1,1,16,8,32,80,1024,67895296,58010370048,854,0.26324,240.209,220371 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x128_64x3_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,128,64,1,1,1,3,4,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.191201,123.857,303399 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x256_64x3_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,256,64,1,1,1,3,2,4,1,16,8,32,80,1024,25427968,58010370048,2281,0.231209,102.425,250900 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x64_64x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,64,64,1,1,1,4,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.207749,113.992,279233 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_64x256_64x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,256,64,1,1,1,4,1,4,1,16,8,32,80,1024,25427968,58010370048,2281,0.219955,107.666,263737 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x32_64x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,32,64,1,1,1,4,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.286484,82.6629,202490 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x128_64x5_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,128,64,1,1,1,5,2,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.171489,138.094,338274 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_64x128_64x6_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,128,64,1,1,1,6,2,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.194468,121.777,298303 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x32_64x6_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,32,64,1,1,1,6,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.278948,84.8963,207961 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x128_128x3_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,128,128,1,1,1,3,4,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.166451,142.274,348513 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x256_128x3_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,256,128,1,1,1,3,2,4,1,16,8,32,80,1024,25427968,58010370048,2281,0.215173,110.059,269599 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x64_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,64,128,1,1,1,4,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.198574,119.258,292135 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_64x256_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,256,128,1,1,1,4,1,4,1,16,8,32,80,1024,25427968,58010370048,2281,0.237681,99.6364,244069 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_256x32_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,256,32,128,1,1,1,4,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.258499,91.6123,224413 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_32x256_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,32,256,128,1,1,1,4,1,4,1,16,8,32,80,1024,25427968,58010370048,2281,0.274616,86.2354,211242 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x128_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,128,128,1,1,1,4,2,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.194836,121.546,297739 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_64x128_128x3_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,64,128,128,1,1,1,3,2,2,1,16,8,32,80,1024,25427968,58010370048,2281,0.181146,130.733,320242 1,CUTLASS,gemm,cutlass_tensorop_s8_i16832gemm_s8_s4_128x32_128x4_tn_align32,incorrect,success,universal,3456,4096,2048,s8:row,s4:column,s8:column,s8:column,1,0,serial,1,1,heuristic,1,tensorop,s32,128,32,128,1,1,1,4,4,1,1,16,8,32,80,1024,25427968,58010370048,2281,0.270008,87.7071,214847