Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true), cl::Hidden);

static cl::opt<cl::boolOrDefault>
EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"),
cl::init(cl::BOU_UNSET), cl::Hidden);
static cl::opt<bool> UseTerminalRule("terminal-rule",
cl::desc("Apply the terminal rule"),
cl::init(true), cl::Hidden);

/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool> EnableJoinSplits(
Expand Down Expand Up @@ -134,7 +134,6 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
SlotIndexes *SI = nullptr;
const MachineLoopInfo *Loops = nullptr;
RegisterClassInfo RegClassInfo;
bool UseTerminalRule = false;

/// Position and VReg of a PHI instruction during coalescing.
struct PHIValPos {
Expand Down Expand Up @@ -4321,11 +4320,6 @@ bool RegisterCoalescer::run(MachineFunction &fn) {
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);

if (EnableTerminalRule == cl::BOU_UNSET)
UseTerminalRule = STI.enableTerminalRule();
else
UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE;

// If there are PHIs tracked by debug-info, they will need updating during
// coalescing. Build an index of those PHIs to ease updating.
SlotIndexes *Slots = LIS->getSlotIndexes();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
bool enableTerminalRule() const override { return true; }

bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1040,8 +1040,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return true;
}

bool enableTerminalRule() const override { return true; }

bool useAA() const override;

bool enableSubRegLiveness() const override {
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/R600Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
return true;
}

bool enableTerminalRule() const override { return true; }

bool enableSubRegLiveness() const override {
return true;
}
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool isRWPI() const;

bool useMachineScheduler() const { return UseMISched; }
bool enableTerminalRule() const override { return true; }
bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/Hexagon/HexagonSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;

bool enableTerminalRule() const override { return true; }

// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
// compiler time and will be removed eventually anyway.
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}

bool enableMachineScheduler() const override { return true; }
bool enableTerminalRule() const override { return true; }

bool enablePostRAScheduler() const override { return UsePostRAScheduler; }

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/X86/X86Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }

bool enableTerminalRule() const override { return true; }

bool enableEarlyIfConversion() const override;

void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
%11 = sub nsw i32 %7, %9
%12 = icmp slt i32 %10, %11
br i1 %12, label %5, label %13
; CHECK: r1 = r3
; CHECK: if r2 s> r3 goto -10 <test+0x40>
; CHECK: if r2 s> r1 goto -10 <test+0x40>

; <label>:13: ; preds = %5, %2
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
Expand Down
150 changes: 75 additions & 75 deletions llvm/test/CodeGen/NVPTX/atomics-b128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -756,24 +756,24 @@ define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: and.b64 %rd6, %rd1, %rd4;
; CHECK-NEXT: and.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB34_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw and ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -791,24 +791,24 @@ define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: or.b64 %rd6, %rd1, %rd4;
; CHECK-NEXT: or.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB35_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw or ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -826,24 +826,24 @@ define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd4;
; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB36_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw xor ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -861,29 +861,29 @@ define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5;
; CHECK-NEXT: setp.lt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB37_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw min ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -901,29 +901,29 @@ define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5;
; CHECK-NEXT: setp.gt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB38_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw max ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -941,29 +941,29 @@ define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5;
; CHECK-NEXT: setp.lt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB39_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umin ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand All @@ -981,29 +981,29 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
; CHECK-NEXT: mov.b64 %rd2, %rd12;
; CHECK-NEXT: mov.b64 %rd1, %rd11;
; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5;
; CHECK-NEXT: setp.gt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
; CHECK-NEXT: mov.b64 %rd11, %rd1;
; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB40_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
ret i128 %ret
Expand Down
Loading
Loading