@@ -1072,12 +1072,11 @@ define i128 @v_mul_i128(i128 %num, i128 %den) {
1072
1072
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1073
1073
; GFX12-NEXT: v_mov_b32_e32 v2, v11
1074
1074
; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], vcc_lo, v8, v5, v[1:2]
1075
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1 ) | instid1(VALU_DEP_4 )
1075
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4 ) | instid1(VALU_DEP_1 )
1076
1076
; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], s0, v9, v4, v[1:2]
1077
1077
; GFX12-NEXT: s_wait_alu 0xf1ff
1078
1078
; GFX12-NEXT: v_add_co_ci_u32_e64 v7, s0, v12, v7, s0
1079
1079
; GFX12-NEXT: s_wait_alu 0xfffd
1080
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1081
1080
; GFX12-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v7, v6, vcc_lo
1082
1081
; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], null, v10, v5, v[6:7]
1083
1082
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -2436,39 +2435,33 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
2436
2435
; GFX12-NEXT: v_mad_co_u64_u32 v[18:19], null, v16, v12, 0
2437
2436
; GFX12-NEXT: v_mul_lo_u32 v30, v17, v14
2438
2437
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v17, v13, v[0:1]
2439
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2438
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
2440
2439
; GFX12-NEXT: v_mad_co_u64_u32 v[18:19], s0, v17, v11, v[18:19]
2441
2440
; GFX12-NEXT: s_wait_alu 0xf1ff
2442
2441
; GFX12-NEXT: v_cndmask_b32_e64 v20, 0, 1, s0
2443
2442
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, v12, v[0:1]
2444
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2445
2443
; GFX12-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19]
2446
2444
; GFX12-NEXT: s_wait_alu 0xfffd
2447
2445
; GFX12-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo
2448
2446
; GFX12-NEXT: v_mad_co_u64_u32 v[20:21], null, v16, v10, 0
2449
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2450
2447
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v3, v11, v[0:1]
2451
2448
; GFX12-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19]
2452
2449
; GFX12-NEXT: s_wait_alu 0xfffd
2453
2450
; GFX12-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, 0, v22, vcc_lo
2454
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2455
2451
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v4, v10, v[0:1]
2456
2452
; GFX12-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19]
2457
2453
; GFX12-NEXT: s_wait_alu 0xfffd
2458
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2459
2454
; GFX12-NEXT: v_add_co_ci_u32_e32 v26, vcc_lo, 0, v24, vcc_lo
2460
2455
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v9, v[0:1]
2461
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
2456
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2462
2457
; GFX12-NEXT: v_mad_co_u64_u32 v[22:23], null, v6, v8, v[0:1]
2463
2458
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[20:21]
2464
2459
; GFX12-NEXT: s_wait_alu 0xf1ff
2465
2460
; GFX12-NEXT: v_cndmask_b32_e64 v25, 0, 1, s0
2466
2461
; GFX12-NEXT: v_mov_b32_e32 v20, v22
2467
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2468
2462
; GFX12-NEXT: v_mad_co_u64_u32 v[21:22], vcc_lo, v2, v8, v[0:1]
2469
2463
; GFX12-NEXT: s_wait_alu 0xfffd
2470
2464
; GFX12-NEXT: v_add_co_ci_u32_e32 v29, vcc_lo, 0, v25, vcc_lo
2471
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2472
2465
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], s0, v16, v13, v[19:20]
2473
2466
; GFX12-NEXT: v_mov_b32_e32 v19, v22
2474
2467
; GFX12-NEXT: v_mul_lo_u32 v22, v16, v15
@@ -2490,7 +2483,6 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
2490
2483
; GFX12-NEXT: s_wait_alu 0xf1ff
2491
2484
; GFX12-NEXT: v_add_co_ci_u32_e64 v6, s2, 0, v6, s2
2492
2485
; GFX12-NEXT: v_mov_b32_e32 v14, v21
2493
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2494
2486
; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], s2, v2, v9, v[11:12]
2495
2487
; GFX12-NEXT: s_wait_alu 0xf1ff
2496
2488
; GFX12-NEXT: v_add_co_ci_u32_e64 v6, s2, 0, v6, s2
@@ -2504,7 +2496,6 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
2504
2496
; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], s4, v5, v8, v[10:11]
2505
2497
; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], s5, v17, v8, v[12:13]
2506
2498
; GFX12-NEXT: s_wait_alu 0xf1ff
2507
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
2508
2499
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, s5, v9, v3, s5
2509
2500
; GFX12-NEXT: s_wait_alu 0xf1ff
2510
2501
; GFX12-NEXT: v_add_co_ci_u32_e64 v4, s5, v29, v4, s5
@@ -2521,10 +2512,9 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
2521
2512
; GFX12-NEXT: v_add_co_ci_u32_e64 v9, s2, v9, v25, s3
2522
2513
; GFX12-NEXT: v_add_co_ci_u32_e64 v9, s1, v9, v20, s1
2523
2514
; GFX12-NEXT: s_wait_alu 0xfffd
2524
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2525
2515
; GFX12-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v28, vcc_lo
2516
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2526
2517
; GFX12-NEXT: v_add_co_ci_u32_e64 v9, vcc_lo, v9, v27, s0
2527
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2528
2518
; GFX12-NEXT: v_mad_co_u64_u32 v[7:8], null, v7, v8, v[9:10]
2529
2519
; GFX12-NEXT: s_wait_alu 0xf1fd
2530
2520
; GFX12-NEXT: s_setpc_b64 s[30:31]
0 commit comments