diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0c7e20fc1ebf3..4ab8144b45b74 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -2169,9 +2169,7 @@ defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; -let IsSingle = 1 in { - defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; -} +defm V_PK_FMAC_F16 : VOP2_Real_gfx10<0x03c>; // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : @@ -2560,6 +2558,8 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; + +defm V_PK_FMAC_F16 : VOP2_Real_e32e64_gfx9<0x03c>; } // End AssemblerPredicate = isGFX9Only defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s index 3dcf288bbbaa5..bbd36a9140b96 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -13185,3 +13185,15 @@ v_pk_fmac_f16 v5, -4.0, v2 v_pk_fmac_f16 v5, v1, v255 // GFX10: encoding: [0x01,0xff,0x0b,0x78] + +v_pk_fmac_f16 v5, v1, v2 +// GFX10: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s new file mode 100644 index 0000000000000..f7dab2d0359dc --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s + +v_pk_fmac_f16 v5, v1, v2 +// CHECK-MI: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] + +v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s index c695bc3600c38..f5afaa6bd6181 100644 --- a/llvm/test/MC/AMDGPU/literalv216.s +++ b/llvm/test/MC/AMDGPU/literalv216.s @@ -291,4 +291,4 @@ v_pk_add_u16 v5, v1, 123456.0 // FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid v_pk_fmac_f16 v5, 0x12345678, v2 // NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] +// GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2.txt index fb1099d709940..b6cb64fad9d84 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2.txt @@ -1779,52 +1779,52 @@ # GFX10: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] 0x6a,0x04,0x0a,0x38 -# GFX10: v_pk_fmac_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] +# GFX10: v_pk_fmac_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] 0x01,0x05,0xfe,0x79 -# GFX10: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] 0xf7,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] 0x80,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78] 0x67,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78] 0x77,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] 0x01,0xff,0x0b,0x78 -# GFX10: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 # W32: v_sub_co_ci_u32_e32 v255, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0xfe,0x53] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt index 1774efa4a65c7..4a7471e6c6f98 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt @@ -2476,3 +2476,10 @@ # W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] # W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] 0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00 + diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt index 40b8f24e4d72f..233f93a5b8e7d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt @@ -222,3 +222,6 @@ # W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] # W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] 0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt index a022c79fe97e6..97c81ed1a629a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt @@ -144,5 +144,5 @@ # Packed VOP2 #===----------------------------------------------------------------------===// -# GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] +# GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] 0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt new file mode 100644 index 0000000000000..ac1ef4baa9aa4 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt @@ -0,0 +1,10 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s + +# CHECK-MI: v_pk_fmac_f16_e32 v5, v1, v2 +0x01,0x05,0x0a,0x78 + +# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff + +# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00