From 16db13131fa66da2c2cb7571d08c05b59183b3dd Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 17 Oct 2023 15:23:35 -0700 Subject: [PATCH 1/3] [AMDGPU] Add legality check when folding short 64-bit literals We can only fold it if it can fit into 32-bit. I believe it did not trigger yet because we do not select 64-bit literals generally. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 ++ .../AMDGPU/fold-short-64-bit-literals.mir | 101 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2ad07550c7639..e01ca73c135c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5490,6 +5490,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return true; } + if (MO->isImm()) { + uint64_t Imm = MO->getImm(); + bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64; + bool Is64BitOp = Is64BitFPOp || + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64; + if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) && + !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) + return false; + } + // Handle non-register types that are treated like immediates. assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); diff --git a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir new file mode 100644 index 0000000000000..eb74412b18b3a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir @@ -0,0 +1,101 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: no_fold_fp_64bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: no_fold_fp_64bit_literal_sgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200 + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, [[S_MOV_B64_]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] + %0:vreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 1311768467750121200 + %2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %2 +... + +--- +name: no_fold_fp_64bit_literal_vgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: no_fold_fp_64bit_literal_vgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, [[V_MOV_B]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] + %0:vreg_64 = IMPLICIT_DEF + %1:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec + %2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %2 +... + +--- +name: fold_fp_32bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fold_fp_32bit_literal_sgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, 4636737291354636288, 0, [[DEF]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] + %0:vreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 4636737291354636288 + %2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %2 +... + +--- +name: no_fold_int_64bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: no_fold_int_64bit_literal_sgpr + ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200 + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], [[S_MOV_B64_]], implicit-def $scc + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]] + %0:sreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 1311768467750121200 + %2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc + SI_RETURN_TO_EPILOG %2 +... + +--- +name: fold_int_32bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fold_int_32bit_literal_sgpr + ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 2147483647, implicit-def $scc + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]] + %0:sreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 2147483647 + %2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc + SI_RETURN_TO_EPILOG %2 +... + +--- +name: fold_uint_32bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: fold_uint_32bit_literal_sgpr + ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 4294967295, implicit-def $scc + ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]] + %0:sreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 4294967295 + %2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc + SI_RETURN_TO_EPILOG %2 +... From 1a3c5d39d3451816d568506112239b2018ce51b0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 17 Oct 2023 16:08:46 -0700 Subject: [PATCH 2/3] Added packed types which also have 64-bit operands --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +++- .../AMDGPU/fold-short-64-bit-literals.mir | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e01ca73c135c5..e1193e18d95b5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5494,7 +5494,9 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, uint64_t Imm = MO->getImm(); bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64; bool Is64BitOp = Is64BitFPOp || - OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64; + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) && !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) return false; diff --git a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir index eb74412b18b3a..328ee991da8f4 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir @@ -99,3 +99,27 @@ body: | %2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc SI_RETURN_TO_EPILOG %2 ... + +--- +name: no_fold_v2fp_64bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vreg_64 = V_MOV_B64_PSEUDO 4629700418019000320, implicit $exec + %2:vreg_64 = V_PK_ADD_F32 0, %0, 0, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %2 +... + +--- +name: fold_v2fp_32bit_literal_sgpr +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vreg_64 = V_MOV_B64_PSEUDO 1065353216, implicit $exec + %2:vreg_64 = V_PK_ADD_F32 0, %0, 0, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG %2 +... From 7c04b2b7229ec052308f26c3a8b539aac09c65d4 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 17 Oct 2023 17:13:36 -0700 Subject: [PATCH 3/3] Clang-format I like to move it, move it! --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e1193e18d95b5..f439d4e4b8765 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5494,9 +5494,9 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, uint64_t Imm = MO->getImm(); bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64; bool Is64BitOp = Is64BitFPOp || - OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || - OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || + OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) && !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) return false;