From 421b949f53f69b9790a1e8ff8a092bfc7bc56f28 Mon Sep 17 00:00:00 2001 From: Alessandro Decina Date: Tue, 4 Jan 2022 05:32:28 +1100 Subject: [PATCH] [SOL] add support for (pseudo) atomics to SBF (#23) Lower atomic operations to their regular non-atomic equivalents. Lowering for all operations except atomic fence is done at DAG legalization time. Fences are removed at instruction emission time. --- llvm/lib/Target/BPF/BPFISelLowering.cpp | 193 +++++++++++++++-- llvm/lib/Target/BPF/BPFISelLowering.h | 4 + llvm/lib/Target/BPF/BPFInstrInfo.td | 9 + llvm/test/CodeGen/BPF/atomics_sbf.ll | 276 ++++++++++++++++++++++++ 4 files changed, 470 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/atomics_sbf.ll diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index cbc2808615932c..54a80496af59d8 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -79,10 +79,30 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - // Set unsupported atomic operations as Custom so - // we can emit better error messages than fatal error - // from selectiondag. - for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { + for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i32, MVT::i64}) { + if (Subtarget->isSolana()) { + // Implement custom lowering for all atomic operations + setOperationAction(ISD::ATOMIC_SWAP, VT, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); + continue; + } + + if (VT == MVT::i64) { + continue; + } + + // Set unsupported atomic operations as Custom so we can emit better error + // messages than fatal error from selectiondag. if (VT == MVT::i32) { if (STI.getHasAlu32()) continue; @@ -210,7 +230,17 @@ bool BPFTargetLowering::allowsMisalignedMemoryAccesses( return isSolana; } -bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { +bool BPFTargetLowering::lowerAtomicStoreAsStoreSDNode( + const StoreInst &SI) const { + return Subtarget->isSolana(); +} + +bool BPFTargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + return Subtarget->isSolana(); +} + +bool BPFTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { return false; } @@ -280,19 +310,31 @@ BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } -void BPFTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { +void BPFTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { const char *err_msg; uint32_t Opcode = N->getOpcode(); switch (Opcode) { default: report_fatal_error("Unhandled custom legalization"); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + if (Subtarget->isSolana()) { + // We do lowering during legalization, see LowerOperation() + return; + } + if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD) err_msg = "Unsupported atomic operations, please use 32/64 bit version"; else @@ -312,10 +354,23 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerGlobalAddress(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_XOR: + return LowerATOMICRMW(Op, DAG); case ISD::DYNAMIC_STACKALLOC: report_fatal_error("Unsupported dynamic stack allocation"); default: - llvm_unreachable("unimplemented operand"); + llvm_unreachable("unimplemented atomic operand"); } } @@ -411,7 +466,6 @@ SDValue BPFTargetLowering::LowerFormalArguments( fail(DL, DAG, "functions with VarArgs or StructRet are not supported"); } - return Chain; } @@ -738,6 +792,114 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); } +SDValue BPFTargetLowering::LowerATOMICRMW(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicSDNode *AN = cast(Op); + assert(AN && "Expected custom lowering of an atomic load node"); + + SDValue Chain = AN->getChain(); + SDValue Ptr = AN->getBasePtr(); + EVT PtrVT = AN->getMemoryVT(); + EVT RetVT = Op.getValueType(); + + // Load the current value + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, DL, RetVT, Chain, Ptr, MachinePointerInfo(), + PtrVT, AN->getAlignment()); + Chain = Load.getValue(1); + + // Most ops return the current value, except CMP_SWAP_WITH_SUCCESS see below + SDValue Ret = Load; + SDValue RetFlag; + + // Val contains the new value we want to set. For CMP_SWAP, Cmp contains the + // expected current value. + SDValue Cmp, Val; + if (AN->isCompareAndSwap()) { + Cmp = Op.getOperand(2); + Val = Op.getOperand(3); + + // The Cmp value must match the pointer type + EVT CmpVT = Cmp->getValueType(0); + if (CmpVT != RetVT) { + Cmp = RetVT.bitsGT(CmpVT) ? DAG.getNode(ISD::SIGN_EXTEND, DL, RetVT, Cmp) + : DAG.getNode(ISD::TRUNCATE, DL, RetVT, Cmp); + } + } else { + Val = AN->getVal(); + } + + // The new value type must match the pointer type + EVT ValVT = Val->getValueType(0); + if (ValVT != RetVT) { + Val = RetVT.bitsGT(ValVT) ? DAG.getNode(ISD::SIGN_EXTEND, DL, RetVT, Val) + : DAG.getNode(ISD::TRUNCATE, DL, RetVT, Val); + ValVT = Val->getValueType(0); + } + + SDValue NewVal; + switch (Op.getOpcode()) { + case ISD::ATOMIC_SWAP: + NewVal = Val; + break; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + EVT RetFlagVT = AN->getValueType(1); + NewVal = DAG.getSelectCC(DL, Load, Cmp, Val, Load, ISD::SETEQ); + RetFlag = DAG.getSelectCC( + DL, Load, Cmp, DAG.getBoolConstant(true, DL, RetFlagVT, RetFlagVT), + DAG.getBoolConstant(false, DL, RetFlagVT, RetFlagVT), ISD::SETEQ); + break; + } + case ISD::ATOMIC_LOAD_ADD: + NewVal = DAG.getNode(ISD::ADD, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_SUB: + NewVal = DAG.getNode(ISD::SUB, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_AND: + NewVal = DAG.getNode(ISD::AND, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_NAND: { + NewVal = + DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, ValVT, Load, Val), ValVT); + break; + } + case ISD::ATOMIC_LOAD_OR: + NewVal = DAG.getNode(ISD::OR, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_XOR: + NewVal = DAG.getNode(ISD::XOR, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_MIN: + NewVal = DAG.getNode(ISD::SMIN, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_UMIN: + NewVal = DAG.getNode(ISD::UMIN, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_MAX: + NewVal = DAG.getNode(ISD::SMAX, DL, ValVT, Load, Val); + break; + case ISD::ATOMIC_LOAD_UMAX: + NewVal = DAG.getNode(ISD::UMAX, DL, ValVT, Load, Val); + break; + default: + llvm_unreachable("unknown atomicrmw op"); + } + + Chain = + DAG.getTruncStore(Chain, DL, NewVal, Ptr, MachinePointerInfo(), PtrVT); + + if (RetFlag) { + // CMP_SWAP_WITH_SUCCESS returns {value, success, chain} + Ret = DAG.getMergeValues({Ret, RetFlag, Chain}, DL); + } else { + // All the other ops return {value, chain} + Ret = DAG.getMergeValues({Ret, Chain}, DL); + } + + return Ret; +} + const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((BPFISD::NodeType)Opcode) { case BPFISD::FIRST_NUMBER: @@ -841,6 +1003,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Opc == BPF::Select_32_64); bool isMemcpyOp = Opc == BPF::MEMCPY; + bool isAtomicFence = Opc == BPF::ATOMIC_FENCE; #ifndef NDEBUG bool isSelectRIOp = (Opc == BPF::Select_Ri || @@ -849,13 +1012,19 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Opc == BPF::Select_Ri_32_64); - assert((isSelectRROp || isSelectRIOp || isMemcpyOp) && + assert((isSelectRROp || isSelectRIOp || isMemcpyOp || isAtomicFence) && "Unexpected instr type to insert"); #endif if (isMemcpyOp) return EmitInstrWithCustomInserterMemcpy(MI, BB); + if (isAtomicFence) { + // this is currently a nop + MI.eraseFromParent(); + return BB; + } + bool is32BitCmp = (Opc == BPF::Select_32 || Opc == BPF::Select_32_64 || Opc == BPF::Select_Ri_32 || diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 8a68f7056875bd..8370fdb3d538a9 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -71,6 +71,9 @@ class BPFTargetLowering : public TargetLowering { MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; + bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; + bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; + private: // Control Instruction Selection Features bool HasAlu32; @@ -80,6 +83,7 @@ class BPFTargetLowering : public TargetLowering { SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMICRMW(SDValue Op, SelectionDAG &DAG) const; // Lower the result values of a call, copying them out of physregs into vregs SDValue LowerCallResult(SDValue Chain, SDValue InFlag, diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 082e1f4a92c26b..1ed49d97e1b1a1 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -53,6 +53,7 @@ def BPFIsLittleEndian : Predicate<"CurDAG->getDataLayout().isLittleEndian()">; def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">; +def BPFSubtargetSolana : Predicate<"Subtarget->isSolana()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -745,6 +746,14 @@ def : Pat<(atomic_load_sub_32 ADDRri:$addr, GPR32:$val), def : Pat<(atomic_load_sub_64 ADDRri:$addr, GPR:$val), (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>; +let Predicates = [BPFSubtargetSolana], usesCustomInserter = 1, isCodeGenOnly = 1 in { + def ATOMIC_FENCE : Pseudo< + (outs), + (ins), + "#atomic_fence", + [(atomic_fence timm, timm)]>; +} + // Atomic Exchange class XCHG : TYPE_LD_ST w2 goto +; CHECK: w3 = w2 +; CHECK: *(u32 *)(r1 + 0) = w3 +define dso_local i32 @test_max_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw max i32* %ptr, i32 %v release, align 1 + ret i32 %0 +} + +; CHECK-LABEL: test_max_64 +; CHECK: r0 = *(u64 *)(r1 + 0) +; CHECK: r3 = r0 +; CHECK: if r0 s> r2 goto +; CHECK: r3 = r2 +; CHECK: *(u64 *)(r1 + 0) = r3 +define dso_local i64 @test_max_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw max i64* %ptr, i64 %v release, align 1 + ret i64 %0 +} + +; CHECK-LABEL: test_umin_32 +; CHECK: w0 = *(u32 *)(r1 + 0) +; CHECK: w3 = w0 +; CHECK: if w0 < w2 goto +; CHECK: w3 = w2 +; CHECK: *(u32 *)(r1 + 0) = w3 +define dso_local i32 @test_umin_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw umin i32* %ptr, i32 %v release, align 1 + ret i32 %0 +} + +; CHECK-LABEL: test_umin_64 +; CHECK: r0 = *(u64 *)(r1 + 0) +; CHECK: r3 = r0 +; CHECK: if r0 < r2 goto +; CHECK: r3 = r2 +; CHECK: *(u64 *)(r1 + 0) = r3 +define dso_local i64 @test_umin_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw umin i64* %ptr, i64 %v release, align 1 + ret i64 %0 +} + +; CHECK-LABEL: test_umax_32 +; CHECK: w0 = *(u32 *)(r1 + 0) +; CHECK: w3 = w0 +; CHECK: if w0 > w2 goto +; CHECK: w3 = w2 +; CHECK: *(u32 *)(r1 + 0) = w3 +define dso_local i32 @test_umax_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw umax i32* %ptr, i32 %v release, align 1 + ret i32 %0 +} + +; CHECK-LABEL: test_umax_64 +; CHECK: r0 = *(u64 *)(r1 + 0) +; CHECK: r3 = r0 +; CHECK: if r0 > r2 goto +; CHECK: r3 = r2 +; CHECK: *(u64 *)(r1 + 0) = r3 +define dso_local i64 @test_umax_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw umax i64* %ptr, i64 %v release, align 1 + ret i64 %0 +}