Skip to content

Commit

Permalink
[AArch64][GlobalISel] Improve and expand fcopysign lowering (llvm#71283)
Browse files Browse the repository at this point in the history
This alters the lowering of G_COPYSIGN to support vector types. The
general idea is that we just lower it to vector operations using and/or
and a mask, which are now converted to a BIF/BIT/BSP.

In the process the existing AArch64LegalizerInfo::legalizeFCopySign can
be removed, replying on expanding the scalar versions to vector instead,
which just needs a small adjustment to allow widening scalars to
vectors.
  • Loading branch information
davemgreen authored Feb 17, 2024
1 parent ccc20b4 commit 3a77522
Show file tree
Hide file tree
Showing 7 changed files with 454 additions and 419 deletions.
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5210,6 +5210,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCOPYSIGN:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_SADDSAT:
Expand Down
15 changes: 10 additions & 5 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,19 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());

assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
assert(Op0Ty.isVector() && "Non vector type");
assert(((ResTy.isScalar() && (ResTy == Op0Ty.getElementType())) ||
(ResTy.isVector() &&
(ResTy.getElementType() == Op0Ty.getElementType()))) &&
"Different vector element types");
assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
"Op0 has fewer elements");
assert(
(ResTy.isScalar() || (ResTy.getNumElements() < Op0Ty.getNumElements())) &&
"Op0 has fewer elements");

SmallVector<Register, 8> Regs;
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
if (ResTy.isScalar())
return buildCopy(Res, Unmerge.getReg(0));
SmallVector<Register, 8> Regs;
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
Regs.push_back(Unmerge.getReg(i));
return buildMergeLikeInstr(Res, Regs);
Expand Down
73 changes: 8 additions & 65 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,10 +1157,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
.legalFor({{s64, s32}, {s64, s64}});

// TODO: Custom legalization for vector types.
// TODO: Custom legalization for mismatched types.
// TODO: s16 support.
getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
getActionDefinitionsBuilder(G_FCOPYSIGN)
.moreElementsIf(
[](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
[=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[0];
return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
})
.lower();

getActionDefinitionsBuilder(G_FMAD).lower();

Expand Down Expand Up @@ -1217,8 +1222,6 @@ bool AArch64LegalizerInfo::legalizeCustom(
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
return legalizeMemOps(MI, Helper);
case TargetOpcode::G_FCOPYSIGN:
return legalizeFCopySign(MI, Helper);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, Helper);
case TargetOpcode::G_DYN_STACKALLOC:
Expand Down Expand Up @@ -1960,66 +1963,6 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
return false;
}

bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
LegalizerHelper &Helper) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
assert(DstTy.isScalar() && "Only expected scalars right now!");
const unsigned DstSize = DstTy.getSizeInBits();
assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
"Expected homogeneous types!");

// We want to materialize a mask with the high bit set.
uint64_t EltMask;
LLT VecTy;

// TODO: s16 support.
switch (DstSize) {
default:
llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
case 64: {
// AdvSIMD immediate moves cannot materialize out mask in a single
// instruction for 64-bit elements. Instead, materialize zero and then
// negate it.
EltMask = 0;
VecTy = LLT::fixed_vector(2, DstTy);
break;
}
case 32:
EltMask = 0x80000000ULL;
VecTy = LLT::fixed_vector(4, DstTy);
break;
}

// Widen In1 and In2 to 128 bits. We want these to eventually become
// INSERT_SUBREGs.
auto Undef = MIRBuilder.buildUndef(VecTy);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto Ins1 = MIRBuilder.buildInsertVectorElement(
VecTy, Undef, MI.getOperand(1).getReg(), Zero);
auto Ins2 = MIRBuilder.buildInsertVectorElement(
VecTy, Undef, MI.getOperand(2).getReg(), Zero);

// Construct the mask.
auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
if (DstSize == 64)
Mask = MIRBuilder.buildFNeg(VecTy, Mask);

auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});

// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
// want this to eventually become an EXTRACT_SUBREG.
SmallVector<Register, 2> DstRegs(1, Dst);
for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
MIRBuilder.buildUnmerge(DstRegs, Sel);
MI.eraseFromParent();
return true;
}

bool AArch64LegalizerInfo::legalizeExtractVectorElt(
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
LegalizerHelper &Helper) const;
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
Expand Down
40 changes: 24 additions & 16 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val:_(s32) = COPY $s0
; CHECK-NEXT: %sign:_(s32) = COPY $s1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC1]], [[IVEC]]
; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %val(s32), [[DEF]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %sign(s32), [[DEF]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
; CHECK-NEXT: %fcopysign:_(s32) = COPY [[UV]](s32)
; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%val:_(s32) = COPY $s0
Expand All @@ -41,14 +45,18 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val:_(s64) = COPY $d0
; CHECK-NEXT: %sign:_(s64) = COPY $d1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC1]], [[IVEC]]
; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %val(s64), [[DEF]](s64)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %sign(s64), [[DEF]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[OR]](<2 x s64>)
; CHECK-NEXT: %fcopysign:_(s64) = COPY [[UV]](s64)
; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%val:_(s64) = COPY $d0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
Loading

0 comments on commit 3a77522

Please sign in to comment.