From 553b2a97012757935293fdffa648967094a69982 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 29 Jul 2024 15:37:55 +0100 Subject: [PATCH 1/4] [RISCV] Add additional fence for amocas when required by recent ABI change A recent atomics ABI change / fix requiresthat for the "A6C" and A6S" atomics ABIs (i.e. both of those supported by LLVM currently), an additional fence is inserted for an atomic_compare_exchange with seq_cst failure ordering. This isn't trivial to support through the hooks used by AtomicExpandPass because that pass assumes that when fences are inserted, the original atomics ordering information can be removed from the instruction. Rather than try to change and complicate that API, this patch implements the needed fence insertion in RISCVCodeGenPrepare. --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 16 ++++++++++++++++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 ++++ .../RISCV/atomic-cmpxchg-branch-on-result.ll | 5 +++++ llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 14 ++++++++++++++ llvm/test/CodeGen/RISCV/atomic-rmw.ll | 14 ++++++++++++++ llvm/test/CodeGen/RISCV/atomic-signext.ll | 2 ++ 6 files changed, 55 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5ab..bc6dd0a357c79 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -10,6 +10,8 @@ // It munges the code in the input function to better prepare it for // SelectionDAG-based code generation. This works around limitations in it's // basic-block-at-a-time approach. +// It additionally implements a fence insertion for an atomic cmpxchg in a +// case that isn't easy to do with the current AtomicExpandPass hooks API. // //===----------------------------------------------------------------------===// @@ -59,6 +61,7 @@ class RISCVCodeGenPrepare : public FunctionPass, bool visitAnd(BinaryOperator &BO); bool visitIntrinsicInst(IntrinsicInst &I); bool expandVPStrideLoad(IntrinsicInst &I); + bool visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); }; } // end anonymous namespace @@ -212,6 +215,19 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { return true; } +// Insert a leading fence (needed for broadest atomics ABI compatibility) +// only if the Zacas extension is enabled and the AtomicCmpXchgInst has a +// SequentiallyConsistent failure ordering. +bool RISCVCodeGenPrepare::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + IRBuilder<> Builder(&I); + if (!ST->hasStdExtZacas() || + I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent) + return false; + + Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + return true; +} + bool RISCVCodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) return false; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 498c77f1875ed..517eea32719b2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -680,6 +680,10 @@ class RISCVTargetLowering : public TargetLowering { bool preferZeroCompareBranch() const override { return true; } + // Note that one specific case requires fence insertion for an + // AtomicCmpXchgInst but is handled via RISCVCodeGenPrepare rather than this + // hook due to limitations in the interface here (see RISCVCodeGenPrepare + // for more information). bool shouldInsertFencesForAtomic(const Instruction *I) const override { return isa(I) || isa(I); } diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll index b98d2d57a0b52..e70ba93de75e0 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll @@ -36,6 +36,7 @@ define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) n ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB0_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a3, a1 ; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) ; ZACAS-NEXT: bne a3, a1, .LBB0_1 @@ -76,6 +77,7 @@ define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) n ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB1_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a3, a1 ; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) ; ZACAS-NEXT: beq a3, a1, .LBB1_1 @@ -216,6 +218,7 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZABHA: # %bb.0: # %entry ; RV64IA-ZABHA-NEXT: .LBB2_1: # %do_cmpxchg ; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: mv a3, a1 ; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0) ; RV64IA-ZABHA-NEXT: bne a3, a1, .LBB2_1 @@ -368,6 +371,7 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZABHA: # %bb.0: # %entry ; RV64IA-ZABHA-NEXT: .LBB3_1: # %do_cmpxchg ; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: mv a3, a1 ; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0) ; RV64IA-ZABHA-NEXT: beq a3, a1, .LBB3_1 @@ -408,6 +412,7 @@ define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signe ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB4_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a4, a1 ; ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) ; ZACAS-NEXT: beqz a3, .LBB4_1 diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index c47db319fc2c3..acd6e8f9afe2a 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -1857,6 +1857,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) ; RV64IA-WMO-ZABHA-NEXT: ret ; @@ -1885,6 +1886,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) ; RV64IA-TSO-ZABHA-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst @@ -3787,6 +3789,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) ; RV64IA-WMO-ZABHA-NEXT: ret ; @@ -3816,6 +3819,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) ; RV64IA-TSO-ZABHA-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst @@ -4788,6 +4792,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) ; RV32IA-WMO-ZACAS-NEXT: ret ; @@ -4804,6 +4809,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -4834,11 +4840,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) ; RV64IA-WMO-ZABHA-NEXT: ret ; @@ -4856,11 +4864,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) ; RV64IA-TSO-ZABHA-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst @@ -5753,11 +5763,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst: ; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) ; RV64IA-WMO-ZABHA-NEXT: ret ; @@ -5774,11 +5786,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst: ; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) ; RV64IA-TSO-ZABHA-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 4223440b9cb88..03157e13bff78 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -4437,6 +4437,7 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 56 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 56 @@ -4452,6 +4453,7 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 56 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 56 @@ -14410,6 +14412,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 48 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 48 @@ -14425,6 +14428,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 48 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 48 @@ -21637,6 +21641,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: mv a3, a0 ; RV32IA-WMO-ZACAS-NEXT: and a4, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: not a4, a4 +; RV32IA-WMO-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2) ; RV32IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -21651,6 +21656,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: mv a3, a0 ; RV32IA-TSO-ZACAS-NEXT: and a4, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: not a4, a4 +; RV32IA-TSO-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZACAS-NEXT: amocas.w a0, a4, (a2) ; RV32IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -21665,6 +21671,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: mv a3, a0 ; RV64IA-WMO-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: not a4, a4 +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2) ; RV64IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -21679,6 +21686,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: mv a3, a0 ; RV64IA-TSO-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: not a4, a4 +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZACAS-NEXT: amocas.w a0, a4, (a2) ; RV64IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -21717,6 +21725,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -21731,6 +21740,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -25546,6 +25556,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: mv a3, a0 ; RV64IA-WMO-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: not a4, a4 +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a0, a4, (a2) ; RV64IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB209_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -25560,6 +25571,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: mv a3, a0 ; RV64IA-TSO-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: not a4, a4 +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZACAS-NEXT: amocas.d a0, a4, (a2) ; RV64IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB209_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -25598,6 +25610,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.d.aqrl a0, a4, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB209_1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end @@ -25612,6 +25625,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.d a0, a4, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB209_1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 775c17c3ceb3f..c143be478948e 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -5562,6 +5562,7 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV32IA-ZACAS: # %bb.0: ; RV32IA-ZACAS-NEXT: beqz a3, .LBB64_2 ; RV32IA-ZACAS-NEXT: # %bb.1: # %then +; RV32IA-ZACAS-NEXT: fence rw, rw ; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) ; RV32IA-ZACAS-NEXT: mv a0, a1 ; RV32IA-ZACAS-NEXT: ret @@ -5612,6 +5613,7 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3 ; RV64IA-ZACAS: # %bb.0: ; RV64IA-ZACAS-NEXT: beqz a3, .LBB64_2 ; RV64IA-ZACAS-NEXT: # %bb.1: # %then +; RV64IA-ZACAS-NEXT: fence rw, rw ; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) ; RV64IA-ZACAS-NEXT: mv a0, a1 ; RV64IA-ZACAS-NEXT: ret From 012e5a5f0611b347fc1154d48d0f9e5f41fbdc55 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 11 Sep 2024 11:52:08 +0100 Subject: [PATCH 2/4] Move ABI motivated fix to separate pass as RISCVCodeGenPrepare isn't run at O0 --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 16 ---- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 1 + llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp | 96 +++++++++++++++++++ llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + 7 files changed, 103 insertions(+), 16 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index f28a7092e3cec..e9613a561a37c 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -55,6 +55,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp RISCVVectorPeephole.cpp + RISCVZacasABIFix.cpp GISel/RISCVCallLowering.cpp GISel/RISCVInstructionSelector.cpp GISel/RISCVLegalizerInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 0d2473c7c5de1..b6b7601e05ce0 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -79,6 +79,9 @@ void initializeRISCVMoveMergePass(PassRegistry &); FunctionPass *createRISCVPushPopOptimizationPass(); void initializeRISCVPushPopOptPass(PassRegistry &); +FunctionPass *createRISCVZacasABIFixPass(); +void initializeRISCVZacasABIFixPass(PassRegistry &); + InstructionSelector * createRISCVInstructionSelector(const RISCVTargetMachine &, const RISCVSubtarget &, diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index bc6dd0a357c79..0a66a38f6d5ab 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -10,8 +10,6 @@ // It munges the code in the input function to better prepare it for // SelectionDAG-based code generation. This works around limitations in it's // basic-block-at-a-time approach. -// It additionally implements a fence insertion for an atomic cmpxchg in a -// case that isn't easy to do with the current AtomicExpandPass hooks API. // //===----------------------------------------------------------------------===// @@ -61,7 +59,6 @@ class RISCVCodeGenPrepare : public FunctionPass, bool visitAnd(BinaryOperator &BO); bool visitIntrinsicInst(IntrinsicInst &I); bool expandVPStrideLoad(IntrinsicInst &I); - bool visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); }; } // end anonymous namespace @@ -215,19 +212,6 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { return true; } -// Insert a leading fence (needed for broadest atomics ABI compatibility) -// only if the Zacas extension is enabled and the AtomicCmpXchgInst has a -// SequentiallyConsistent failure ordering. -bool RISCVCodeGenPrepare::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { - IRBuilder<> Builder(&I); - if (!ST->hasStdExtZacas() || - I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent) - return false; - - Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); - return true; -} - bool RISCVCodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) return false; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 21fbf47875e68..f4c6d14158bc5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -418,6 +418,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandLegacyPass()); + addPass(createRISCVZacasABIFixPass()); if (getOptLevel() != CodeGenOptLevel::None) { if (EnableLoopDataPrefetch) diff --git a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp new file mode 100644 index 0000000000000..2b8616fb42ece --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp @@ -0,0 +1,96 @@ +//===----- RISCVZacasABIFix.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements a fence insertion for an atomic cmpxchg in a case that +// isn't easy to do with the current AtomicExpandPass hooks API. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsRISCV.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-zacas-abi-fix" +#define PASS_NAME "RISC-V Zacas ABI fix" + +namespace { + +class RISCVZacasABIFix : public FunctionPass, + public InstVisitor { + const RISCVSubtarget *ST; + +public: + static char ID; + + RISCVZacasABIFix() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + } + + bool visitInstruction(Instruction &I) { return false; } + bool visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); +}; + +} // end anonymous namespace + +// Insert a leading fence (needed for broadest atomics ABI compatibility) +// only if the Zacas extension is enabled and the AtomicCmpXchgInst has a +// SequentiallyConsistent failure ordering. +bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + IRBuilder<> Builder(&I); + if (!ST->hasStdExtZacas() || + I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent) + return false; + + Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + return true; +} + +bool RISCVZacasABIFix::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); + ST = &TM.getSubtarget(F); + + bool MadeChange = false; + for (auto &BB : F) + for (Instruction &I : llvm::make_early_inc_range(BB)) + MadeChange |= visit(I); + + return MadeChange; +} + +INITIALIZE_PASS_BEGIN(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false) + +char RISCVZacasABIFix::ID = 0; + +FunctionPass *llvm::createRISCVZacasABIFixPass() { + return new RISCVZacasABIFix(); +} diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 953eb873b660b..0256cc9df5692 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -22,6 +22,7 @@ ; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: RISC-V Zacas ABI fix ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index e305a74d7aef3..86400fc95a92e 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -26,6 +26,7 @@ ; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: RISC-V Zacas ABI fix ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops From e8a6043b339713b7a3561c85f04425645f36a5fd Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 18 Sep 2024 11:07:14 +0100 Subject: [PATCH 3/4] Address review comments --- llvm/lib/Target/RISCV/RISCVISelLowering.h | 5 ++--- llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 517eea32719b2..762a59f2c6970 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -681,9 +681,8 @@ class RISCVTargetLowering : public TargetLowering { bool preferZeroCompareBranch() const override { return true; } // Note that one specific case requires fence insertion for an - // AtomicCmpXchgInst but is handled via RISCVCodeGenPrepare rather than this - // hook due to limitations in the interface here (see RISCVCodeGenPrepare - // for more information). + // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather + // than this hook due to limitations in the interface here. bool shouldInsertFencesForAtomic(const Instruction *I) const override { return isa(I) || isa(I); } diff --git a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp index 2b8616fb42ece..29054b84fcc7e 100644 --- a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp +++ b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp @@ -60,9 +60,9 @@ class RISCVZacasABIFix : public FunctionPass, // only if the Zacas extension is enabled and the AtomicCmpXchgInst has a // SequentiallyConsistent failure ordering. bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + assert(ST->hasStdExtZacas() && "only necessary to run in presence of zacas"); IRBuilder<> Builder(&I); - if (!ST->hasStdExtZacas() || - I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent) + if (I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent) return false; Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); @@ -70,7 +70,7 @@ bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { } bool RISCVZacasABIFix::runOnFunction(Function &F) { - if (skipFunction(F)) + if (skipFunction(F) || !ST->hasStdExtZacas()) return false; auto &TPC = getAnalysis(); From 7c39913dbdc33d0a1175a4e556104d9d6038fdd8 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 18 Sep 2024 11:23:11 +0100 Subject: [PATCH 4/4] Fix silly issue where we checked ST before initialised --- llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp index 29054b84fcc7e..17f88e71d0352 100644 --- a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp +++ b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp @@ -70,13 +70,13 @@ bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { } bool RISCVZacasABIFix::runOnFunction(Function &F) { - if (skipFunction(F) || !ST->hasStdExtZacas()) - return false; - auto &TPC = getAnalysis(); auto &TM = TPC.getTM(); ST = &TM.getSubtarget(F); + if (skipFunction(F) || !ST->hasStdExtZacas()) + return false; + bool MadeChange = false; for (auto &BB : F) for (Instruction &I : llvm::make_early_inc_range(BB))