@@ -1499,60 +1499,13 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
14991499; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
15001500; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
15011501; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
1502- ; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
1503- ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
1504- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
1505- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
1506- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
1507- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0
1508- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
1509- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
1510- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
1511- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1512- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1513- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13
1514- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1515- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1
1516- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1517- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0
1518- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
1519- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
1520- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1521- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
1522- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
1523- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1524- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
1525- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
1526- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10
1527- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
1528- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
1529- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
1530- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
1531- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
1532- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1533- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
1534- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
1535- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
1536- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
1537- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
1538- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
1539- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
1540- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1541- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8
1542- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
1543- ; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
1544- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
1545- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
1546- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
1547- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f
1548- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5
1549- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
15501502; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
1551- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
1503+ ; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
15521504; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
1553- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
1554- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1555- ; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
1505+ ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
1506+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
1507+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1508+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
15561509; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
15571510; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
15581511;
@@ -3538,109 +3491,14 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
35383491; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
35393492; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
35403493; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
3494+ ; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
35413495; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
3542- ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
3543- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3
3544- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
3545- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
3546- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2
3547- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
3548- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
3549- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
3550- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3551- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
3552- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13
3553- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3554- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3
3555- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3556- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2
3557- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
3558- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
3559- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3560- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
3561- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
3562- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
3563- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
3564- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
3565- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10
3566- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
3567- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
3568- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
3569- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
3570- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
3571- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3572- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
3573- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
3574- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
3575- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
3576- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
3577- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
3578- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
3579- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3580- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8
3581- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
3582- ; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
3583- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
3584- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
3585- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1
3586- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
3587- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f
3588- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5
3589- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff
3590- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8
3591- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0
3592- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
3593- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe
3594- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5
3595- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
3596- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
3597- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13
3598- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
3599- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3600- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
3601- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
3602- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1
3603- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3604- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0
3605- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9
3606- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000
3607- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3608- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11
3609- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11
3610- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
3611- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10
3612- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
3613- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s5, 0xfc10
3614- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3
3615- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12
3616- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10
3617- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1
3618- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10
3619- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3620- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7
3621- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5
3622- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0
3623- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3
3624- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0
3625- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
3626- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11
3627- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3628- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s3, s3, s10
3629- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31
3630- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
3631- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0
3632- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
3633- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s5, 0x40f
36343496; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
3635- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
3636- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16
3637- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3638- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
3639- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3
3640- ; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
3641- ; GFX1250-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
3642- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3643- ; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
3497+ ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
3498+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
3499+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
3500+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
3501+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
36443502; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
36453503; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
36463504;
0 commit comments