Skip to content

Commit 0a14b9c

Browse files
heiherleecheechen
authored andcommitted
[LoongArch] Improve codegen for atomic ops (llvm#67391)
This PR improves memory barriers generated by atomic operations. Memory barrier semantics of LL/SC: ``` LL: <memory-barrier> + <load-exclusive> SC: <store-conditional> + <memory-barrier> ``` Changes: * Remove unnecessary memory barriers before LL and between LL/SC. * Fix acquire semantics. (If the SC instruction is not executed, then the guarantee of acquiring semantics cannot be ensured. Therefore, an acquire barrier needs to be generated when memory ordering includes an acquire operation.) (cherry picked from commit 203ba23) Change-Id: I4ef87f94e7e01ae9bd93e1e57338534131e93590
1 parent a18df7b commit 0a14b9c

File tree

8 files changed

+407
-148
lines changed

8 files changed

+407
-148
lines changed

llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
153153
Register ScratchReg = MI.getOperand(1).getReg();
154154
Register AddrReg = MI.getOperand(2).getReg();
155155
Register IncrReg = MI.getOperand(3).getReg();
156-
AtomicOrdering Ordering =
157-
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
158156

159157
// .loop:
160-
// if(Ordering != AtomicOrdering::Monotonic)
161-
// dbar 0
162158
// ll.[w|d] dest, (addr)
163159
// binop scratch, dest, val
164160
// sc.[w|d] scratch, scratch, (addr)
165161
// beqz scratch, loop
166-
if (Ordering != AtomicOrdering::Monotonic)
167-
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
168162
BuildMI(LoopMBB, DL,
169163
TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
170164
.addReg(AddrReg)
@@ -251,21 +245,15 @@ static void doMaskedAtomicBinOpExpansion(
251245
Register AddrReg = MI.getOperand(2).getReg();
252246
Register IncrReg = MI.getOperand(3).getReg();
253247
Register MaskReg = MI.getOperand(4).getReg();
254-
AtomicOrdering Ordering =
255-
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
256248

257249
// .loop:
258-
// if(Ordering != AtomicOrdering::Monotonic)
259-
// dbar 0
260250
// ll.w destreg, (alignedaddr)
261251
// binop scratch, destreg, incr
262252
// xor scratch, destreg, scratch
263253
// and scratch, scratch, masktargetdata
264254
// xor scratch, destreg, scratch
265255
// sc.w scratch, scratch, (alignedaddr)
266256
// beqz scratch, loop
267-
if (Ordering != AtomicOrdering::Monotonic)
268-
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
269257
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
270258
.addReg(AddrReg)
271259
.addImm(0);
@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
372360
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
373361
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
374362
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
375-
auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
376363
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
377364

378365
// Insert new MBBs.
379366
MF->insert(++MBB.getIterator(), LoopHeadMBB);
380367
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
381368
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
382-
MF->insert(++LoopTailMBB->getIterator(), TailMBB);
383-
MF->insert(++TailMBB->getIterator(), DoneMBB);
369+
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
384370

385371
// Set up successors and transfer remaining instructions to DoneMBB.
386372
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
387373
LoopHeadMBB->addSuccessor(LoopTailMBB);
388374
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
389375
LoopTailMBB->addSuccessor(LoopHeadMBB);
390-
LoopTailMBB->addSuccessor(TailMBB);
391-
TailMBB->addSuccessor(DoneMBB);
376+
LoopTailMBB->addSuccessor(DoneMBB);
392377
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
393378
DoneMBB->transferSuccessors(&MBB);
394379
MBB.addSuccessor(LoopHeadMBB);
@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
402387

403388
//
404389
// .loophead:
405-
// dbar 0
406390
// ll.w destreg, (alignedaddr)
407391
// and scratch2, destreg, mask
408392
// move scratch1, destreg
409-
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
410393
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
411394
.addReg(AddrReg)
412395
.addImm(0);
@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
463446
// .looptail:
464447
// sc.w scratch1, scratch1, (addr)
465448
// beqz scratch1, loop
466-
// dbar 0x700
467449
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
468450
.addReg(Scratch1Reg)
469451
.addReg(AddrReg)
@@ -472,18 +454,13 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
472454
.addReg(Scratch1Reg)
473455
.addMBB(LoopHeadMBB);
474456

475-
// .tail:
476-
// dbar 0x700
477-
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
478-
479457
NextMBBI = MBB.end();
480458
MI.eraseFromParent();
481459

482460
LivePhysRegs LiveRegs;
483461
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
484462
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
485463
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
486-
computeAndAddLiveIns(LiveRegs, *TailMBB);
487464
computeAndAddLiveIns(LiveRegs, *DoneMBB);
488465

489466
return true;
@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
535512
.addReg(CmpValReg)
536513
.addMBB(TailMBB);
537514
// .looptail:
538-
// dbar 0
539515
// move scratch, newval
540516
// sc.[w|d] scratch, scratch, (addr)
541517
// beqz scratch, loophead
542518
// b done
543-
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
544519
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
545520
.addReg(NewValReg)
546521
.addReg(LoongArch::R0);
@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
573548
.addMBB(TailMBB);
574549

575550
// .looptail:
576-
// dbar 0
577551
// andn scratch, dest, mask
578552
// or scratch, scratch, newval
579553
// sc.[w|d] scratch, scratch, (addr)
580554
// beqz scratch, loophead
581555
// b done
582-
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
583556
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
584557
.addReg(DestReg)
585558
.addReg(MaskReg);
@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
598571
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
599572
}
600573

574+
AtomicOrdering Ordering =
575+
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
576+
int hint;
577+
578+
switch (Ordering) {
579+
case AtomicOrdering::Acquire:
580+
case AtomicOrdering::AcquireRelease:
581+
case AtomicOrdering::SequentiallyConsistent:
582+
// TODO: acquire
583+
hint = 0;
584+
break;
585+
default:
586+
hint = 0x700;
587+
}
588+
601589
// .tail:
602-
// dbar 0x700
603-
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
590+
// dbar 0x700 | acquire
591+
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
604592

605593
NextMBBI = MBB.end();
606594
MI.eraseFromParent();

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
17311731

17321732
class PseudoCmpXchg
17331733
: Pseudo<(outs GPR:$res, GPR:$scratch),
1734-
(ins GPR:$addr, GPR:$cmpval, GPR:$newval)> {
1734+
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> {
17351735
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
17361736
let mayLoad = 1;
17371737
let mayStore = 1;
@@ -1821,14 +1821,28 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
18211821
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
18221822
PseudoMaskedAtomicLoadUMin32>;
18231823

1824-
def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new),
1825-
(PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>;
1824+
// Ordering constants must be kept in sync with the AtomicOrdering enum in
1825+
// AtomicOrdering.h.
1826+
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
1827+
ValueType vt = GRLenVT> {
1828+
def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
1829+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
1830+
def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
1831+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
1832+
def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
1833+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
1834+
def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
1835+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
1836+
def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
1837+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
1838+
}
1839+
1840+
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
1841+
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
18261842
def : Pat<(int_loongarch_masked_cmpxchg_i64
18271843
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
18281844
(PseudoMaskedCmpXchg32
18291845
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
1830-
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
1831-
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
18321846

18331847
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
18341848
PseudoMaskedAtomicLoadMax32>;

llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
3434
; LA64-NEXT: bne $a5, $a3, .LBB0_5
3535
; LA64-NEXT: # %bb.4: # %atomicrmw.start
3636
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
37-
; LA64-NEXT: dbar 0
3837
; LA64-NEXT: move $a7, $a6
3938
; LA64-NEXT: sc.w $a7, $a2, 0
4039
; LA64-NEXT: beqz $a7, .LBB0_3
4140
; LA64-NEXT: b .LBB0_6
4241
; LA64-NEXT: .LBB0_5: # %atomicrmw.start
4342
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
44-
; LA64-NEXT: dbar 1792
43+
; LA64-NEXT: dbar 0
4544
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
4645
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
4746
; LA64-NEXT: addi.w $a6, $a3, 0
@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
8887
; LA64-NEXT: bne $a5, $a3, .LBB1_5
8988
; LA64-NEXT: # %bb.4: # %atomicrmw.start
9089
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
91-
; LA64-NEXT: dbar 0
9290
; LA64-NEXT: move $a7, $a6
9391
; LA64-NEXT: sc.w $a7, $a2, 0
9492
; LA64-NEXT: beqz $a7, .LBB1_3
9593
; LA64-NEXT: b .LBB1_6
9694
; LA64-NEXT: .LBB1_5: # %atomicrmw.start
9795
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
98-
; LA64-NEXT: dbar 1792
96+
; LA64-NEXT: dbar 0
9997
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
10098
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
10199
; LA64-NEXT: addi.w $a6, $a3, 0
@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
129127
; LA64-NEXT: bne $a1, $a3, .LBB2_5
130128
; LA64-NEXT: # %bb.4: # %atomicrmw.start
131129
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
132-
; LA64-NEXT: dbar 0
133130
; LA64-NEXT: move $a6, $a5
134131
; LA64-NEXT: sc.w $a6, $a0, 0
135132
; LA64-NEXT: beqz $a6, .LBB2_3
136133
; LA64-NEXT: b .LBB2_6
137134
; LA64-NEXT: .LBB2_5: # %atomicrmw.start
138135
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
139-
; LA64-NEXT: dbar 1792
136+
; LA64-NEXT: dbar 0
140137
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
141138
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
142139
; LA64-NEXT: move $a3, $a1
@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
168165
; LA64-NEXT: bne $a2, $a3, .LBB3_5
169166
; LA64-NEXT: # %bb.4: # %atomicrmw.start
170167
; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2
171-
; LA64-NEXT: dbar 0
172168
; LA64-NEXT: move $a5, $a4
173169
; LA64-NEXT: sc.d $a5, $a0, 0
174170
; LA64-NEXT: beqz $a5, .LBB3_3
175171
; LA64-NEXT: b .LBB3_6
176172
; LA64-NEXT: .LBB3_5: # %atomicrmw.start
177173
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
178-
; LA64-NEXT: dbar 1792
174+
; LA64-NEXT: dbar 0
179175
; LA64-NEXT: .LBB3_6: # %atomicrmw.start
180176
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
181177
; LA64-NEXT: bne $a2, $a3, .LBB3_1
@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
224220
; LA64-NEXT: bne $a6, $a3, .LBB4_5
225221
; LA64-NEXT: # %bb.4: # %atomicrmw.start
226222
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
227-
; LA64-NEXT: dbar 0
228223
; LA64-NEXT: move $t0, $a7
229224
; LA64-NEXT: sc.w $t0, $a2, 0
230225
; LA64-NEXT: beqz $t0, .LBB4_3
231226
; LA64-NEXT: b .LBB4_6
232227
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
233228
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
234-
; LA64-NEXT: dbar 1792
229+
; LA64-NEXT: dbar 0
235230
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
236231
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
237232
; LA64-NEXT: addi.w $a7, $a3, 0
@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
283278
; LA64-NEXT: bne $a6, $a3, .LBB5_5
284279
; LA64-NEXT: # %bb.4: # %atomicrmw.start
285280
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
286-
; LA64-NEXT: dbar 0
287281
; LA64-NEXT: move $t0, $a7
288282
; LA64-NEXT: sc.w $t0, $a2, 0
289283
; LA64-NEXT: beqz $t0, .LBB5_3
290284
; LA64-NEXT: b .LBB5_6
291285
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
292286
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
293-
; LA64-NEXT: dbar 1792
287+
; LA64-NEXT: dbar 0
294288
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
295289
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
296290
; LA64-NEXT: addi.w $a7, $a3, 0
@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
329323
; LA64-NEXT: bne $a2, $a4, .LBB6_5
330324
; LA64-NEXT: # %bb.4: # %atomicrmw.start
331325
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
332-
; LA64-NEXT: dbar 0
333326
; LA64-NEXT: move $a7, $a6
334327
; LA64-NEXT: sc.w $a7, $a0, 0
335328
; LA64-NEXT: beqz $a7, .LBB6_3
336329
; LA64-NEXT: b .LBB6_6
337330
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
338331
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
339-
; LA64-NEXT: dbar 1792
332+
; LA64-NEXT: dbar 0
340333
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
341334
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
342335
; LA64-NEXT: move $a4, $a2
@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
373366
; LA64-NEXT: bne $a2, $a3, .LBB7_5
374367
; LA64-NEXT: # %bb.4: # %atomicrmw.start
375368
; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2
376-
; LA64-NEXT: dbar 0
377369
; LA64-NEXT: move $a5, $a4
378370
; LA64-NEXT: sc.d $a5, $a0, 0
379371
; LA64-NEXT: beqz $a5, .LBB7_3
380372
; LA64-NEXT: b .LBB7_6
381373
; LA64-NEXT: .LBB7_5: # %atomicrmw.start
382374
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
383-
; LA64-NEXT: dbar 1792
375+
; LA64-NEXT: dbar 0
384376
; LA64-NEXT: .LBB7_6: # %atomicrmw.start
385377
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
386378
; LA64-NEXT: bne $a2, $a3, .LBB7_1

0 commit comments

Comments
 (0)