diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a17bd1b14f412..7980ddb26491b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -22,6 +22,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -2701,8 +2702,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { + GLoadStore &LdSt = cast(I); bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; - LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); + LLT PtrTy = MRI.getType(LdSt.getPointerReg()); if (PtrTy != LLT::pointer(0, 64)) { LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy @@ -2710,20 +2712,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return false; } - auto &MemOp = **I.memoperands_begin(); - uint64_t MemSizeInBytes = MemOp.getSize(); - unsigned MemSizeInBits = MemSizeInBytes * 8; - AtomicOrdering Order = MemOp.getSuccessOrdering(); + uint64_t MemSizeInBytes = LdSt.getMemSize(); + unsigned MemSizeInBits = LdSt.getMemSizeInBits(); + AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); // Need special instructions for atomics that affect ordering. if (Order != AtomicOrdering::NotAtomic && Order != AtomicOrdering::Unordered && Order != AtomicOrdering::Monotonic) { - assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD); + assert(!isa(LdSt)); if (MemSizeInBytes > 64) return false; - if (I.getOpcode() == TargetOpcode::G_LOAD) { + if (isa(LdSt)) { static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); @@ -2737,7 +2738,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } #ifndef NDEBUG - const Register PtrReg = I.getOperand(1).getReg(); + const Register PtrReg = LdSt.getPointerReg(); const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); // Sanity-check the pointer register. assert(PtrRB.getID() == AArch64::GPRRegBankID && @@ -2746,13 +2747,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { "Load/Store pointer operand isn't a pointer"); #endif - const Register ValReg = I.getOperand(0).getReg(); + const Register ValReg = LdSt.getReg(0); + const LLT ValTy = MRI.getType(ValReg); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); + // The code below doesn't support truncating stores, so we need to split it + // again. + if (isa(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { + unsigned SubReg; + LLT MemTy = LdSt.getMMO().getMemoryType(); + auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); + if (!getSubRegForClass(RC, TRI, SubReg)) + return false; + + // Generate a subreg copy. + auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) + .addReg(ValReg, 0, SubReg) + .getReg(0); + RBI.constrainGenericRegister(Copy, *RC, MRI); + LdSt.getOperand(0).setReg(Copy); + } + // Helper lambda for partially selecting I. Either returns the original // instruction with an updated opcode, or a new instruction. auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { - bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; + bool IsStore = isa(I); const unsigned NewOpc = selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); if (NewOpc == I.getOpcode()) @@ -2769,7 +2788,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // Folded something. Create a new instruction and return it. auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); - IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); + Register CurValReg = I.getOperand(0).getReg(); + IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); NewInst.cloneMemRefs(I); for (auto &Fn : *AddrModeFns) Fn(NewInst); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir new file mode 100644 index 0000000000000..3e06016f4af53 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @truncating_f32(double %x) { + %alloca = alloca i32, align 4 + %bitcast = bitcast double %x to i64 + %trunc = trunc i64 %bitcast to i32 + store i32 %trunc, i32* %alloca, align 4 + ret void + } + + define void @truncating_f16(double %x) { + %alloca = alloca i16, align 2 + %bitcast = bitcast double %x to i64 + %trunc = trunc i64 %bitcast to i16 + store i16 %trunc, i16* %alloca, align 2 + ret void + } + + define void @truncating_f8(double %x) { + %alloca = alloca i8, align 1 + %bitcast = bitcast double %x to i64 + %trunc = trunc i64 %bitcast to i8 + store i8 %trunc, i8* %alloca, align 1 + ret void + } + +... +--- +name: truncating_f32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, name: alloca, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $d0 + + ; CHECK-LABEL: name: truncating_f32 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca) + ; CHECK: RET_ReallyLR + %0:fpr(s64) = COPY $d0 + %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca + G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca) + RET_ReallyLR + +... +--- +name: truncating_f16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 2 +stack: + - { id: 0, name: alloca, size: 2, alignment: 2 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $d0 + + ; CHECK-LABEL: name: truncating_f16 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub + ; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca) + ; CHECK: RET_ReallyLR + %0:fpr(s64) = COPY $d0 + %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca + G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca) + RET_ReallyLR + +... +--- +name: truncating_f8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 1 +stack: + - { id: 0, name: alloca, size: 1, alignment: 1 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $d0 + + ; CHECK-LABEL: name: truncating_f8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub + ; CHECK: [[COPY2:%[0-9]+]]:fpr8 = COPY [[COPY1]] + ; CHECK: STRBui [[COPY2]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca) + ; CHECK: RET_ReallyLR + %0:fpr(s64) = COPY $d0 + %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca + G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir index e0983dbfdd14c..9d044d18b8c24 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -278,13 +278,13 @@ body: | ; CHECK-LABEL: name: test_rule96_id2146_at_idx8070 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1)) + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7 ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]] %2:gpr(p0) = COPY $x0 - %0:fpr(s1) = G_LOAD %2(p0) :: (load (s1)) - %1:gpr(s32) = G_ZEXT %0(s1) + %0:fpr(s8) = G_LOAD %2(p0) :: (load (s8)) + %1:gpr(s32) = G_ZEXT %0(s8) $noreg = PATCHABLE_RET %1(s32) ...