Skip to content
This repository was archived by the owner on Mar 6, 2025. It is now read-only.

Commit 6ac8ab6

Browse files
xry111ouuleilei-bot
authored andcommitted
LoongArch: Remove redundant barrier instructions before LL-SC loops
This is isomorphic to the LLVM changes [1-2]. On LoongArch, the LL and SC instructions has memory barrier semantics: - LL: <memory-barrier> + <load-exclusive> - SC: <store-conditional> + <memory-barrier> But the compare and swap operation is allowed to fail, and if it fails the SC instruction is not executed, thus the guarantee of acquiring semantics cannot be ensured. Therefore, an acquire barrier needs to be generated when failure_memorder includes an acquire operation. On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an acquire barrier; on CPUs implementing LoongArch v1.00, it is a full barrier. So it's always enough for acquire semantics. OTOH if an acquire semantic is not needed, we still needs the "dbar 0x700" as the load-load barrier like all LL-SC loops. [1]:llvm/llvm-project#67391 [2]:llvm/llvm-project#69339 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_memmodel_needs_release_fence): Remove. (loongarch_cas_failure_memorder_needs_acquire): New static function. (loongarch_print_operand): Redefine 'G' for the barrier on CAS failure. * config/loongarch/sync.md (atomic_cas_value_strong<mode>): Remove the redundant barrier before the LL instruction, and emit an acquire barrier on failure if needed by failure_memorder. (atomic_cas_value_cmp_and_7_<mode>): Likewise. (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier before the LL instruction. (atomic_cas_value_sub_7_<mode>): Likewise. (atomic_cas_value_and_7_<mode>): Likewise. (atomic_cas_value_xor_7_<mode>): Likewise. (atomic_cas_value_or_7_<mode>): Likewise. (atomic_cas_value_nand_7_<mode>): Likewise. (atomic_cas_value_exchange_7_<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/loongarch/cas-acquire.c: New test.
1 parent c2d62cd commit 6ac8ab6

File tree

3 files changed

+119
-42
lines changed

3 files changed

+119
-42
lines changed

gcc/config/loongarch/loongarch.cc

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4876,27 +4876,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
48764876
}
48774877
}
48784878

4879-
/* Return true if a FENCE should be emitted to before a memory access to
4880-
implement the release portion of memory model MODEL. */
4879+
/* Return true if a FENCE should be emitted after a failed CAS to
4880+
implement the acquire semantic of failure_memorder. */
48814881

48824882
static bool
4883-
loongarch_memmodel_needs_release_fence (enum memmodel model)
4883+
loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
48844884
{
4885-
switch (model)
4885+
switch (memmodel_base (model))
48864886
{
4887+
case MEMMODEL_ACQUIRE:
48874888
case MEMMODEL_ACQ_REL:
48884889
case MEMMODEL_SEQ_CST:
4889-
case MEMMODEL_SYNC_SEQ_CST:
4890-
case MEMMODEL_RELEASE:
4891-
case MEMMODEL_SYNC_RELEASE:
48924890
return true;
48934891

4894-
case MEMMODEL_ACQUIRE:
4895-
case MEMMODEL_CONSUME:
4896-
case MEMMODEL_SYNC_ACQUIRE:
48974892
case MEMMODEL_RELAXED:
4893+
case MEMMODEL_RELEASE:
48984894
return false;
48994895

4896+
/* MEMMODEL_CONSUME is deliberately not handled because it's always
4897+
replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
4898+
MEMMODEL_CONSUME, read the change (re)introducing it carefully and
4899+
decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
49004900
default:
49014901
gcc_unreachable ();
49024902
}
@@ -5006,7 +5006,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
50065006
'C' Print the integer branch condition for comparison OP.
50075007
'd' Print CONST_INT OP in decimal.
50085008
'F' Print the FPU branch condition for comparison OP.
5009-
'G' Print a DBAR insn if the memory model requires a release.
5009+
'G' Print a DBAR insn for CAS failure (with an acquire semantic if
5010+
needed, otherwise a simple load-load barrier).
50105011
'H' Print address 52-61bit relocation associated with OP.
50115012
'h' Print the high-part relocation associated with OP.
50125013
'i' Print i if the operand is not a register.
@@ -5073,8 +5074,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
50735074
break;
50745075

50755076
case 'G':
5076-
if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
5077-
fputs ("dbar\t0", file);
5077+
if (loongarch_cas_failure_memorder_needs_acquire (
5078+
memmodel_from_int (INTVAL (op))))
5079+
fputs ("dbar\t0b10100", file);
5080+
else
5081+
fputs ("dbar\t0x700", file);
50785082
break;
50795083

50805084
case 'h':

gcc/config/loongarch/sync.md

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,18 @@
129129
(clobber (match_scratch:GPR 6 "=&r"))]
130130
""
131131
{
132-
return "%G5\\n\\t"
133-
"1:\\n\\t"
132+
return "1:\\n\\t"
134133
"ll.<amo>\\t%0,%1\\n\\t"
135134
"bne\\t%0,%z2,2f\\n\\t"
136135
"or%i3\\t%6,$zero,%3\\n\\t"
137136
"sc.<amo>\\t%6,%1\\n\\t"
138-
"beq\\t$zero,%6,1b\\n\\t"
137+
"beqz\\t%6,1b\\n\\t"
139138
"b\\t3f\\n\\t"
140139
"2:\\n\\t"
141-
"dbar\\t0x700\\n\\t"
140+
"%G5\\n\\t"
142141
"3:\\n\\t";
143142
}
144-
[(set (attr "length") (const_int 32))])
143+
[(set (attr "length") (const_int 28))])
145144

146145
(define_expand "atomic_compare_and_swap<mode>"
147146
[(match_operand:SI 0 "register_operand" "") ;; bool output
@@ -234,8 +233,7 @@
234233
(clobber (match_scratch:GPR 7 "=&r"))]
235234
""
236235
{
237-
return "%G6\\n\\t"
238-
"1:\\n\\t"
236+
return "1:\\n\\t"
239237
"ll.<amo>\\t%0,%1\\n\\t"
240238
"and\\t%7,%0,%2\\n\\t"
241239
"bne\\t%7,%z4,2f\\n\\t"
@@ -245,10 +243,10 @@
245243
"beq\\t$zero,%7,1b\\n\\t"
246244
"b\\t3f\\n\\t"
247245
"2:\\n\\t"
248-
"dbar\\t0x700\\n\\t"
246+
"%G6\\n\\t"
249247
"3:\\n\\t";
250248
}
251-
[(set (attr "length") (const_int 40))])
249+
[(set (attr "length") (const_int 36))])
252250

253251
(define_expand "atomic_compare_and_swap<mode>"
254252
[(match_operand:SI 0 "register_operand" "") ;; bool output
@@ -303,8 +301,7 @@
303301
(clobber (match_scratch:GPR 8 "=&r"))]
304302
""
305303
{
306-
return "%G6\\n\\t"
307-
"1:\\n\\t"
304+
return "1:\\n\\t"
308305
"ll.<amo>\\t%0,%1\\n\\t"
309306
"and\\t%7,%0,%3\\n\\t"
310307
"add.w\\t%8,%0,%z5\\n\\t"
@@ -314,7 +311,7 @@
314311
"beq\\t$zero,%7,1b";
315312
}
316313

317-
[(set (attr "length") (const_int 32))])
314+
[(set (attr "length") (const_int 28))])
318315

319316
(define_insn "atomic_cas_value_sub_7_<mode>"
320317
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -330,8 +327,7 @@
330327
(clobber (match_scratch:GPR 8 "=&r"))]
331328
""
332329
{
333-
return "%G6\\n\\t"
334-
"1:\\n\\t"
330+
return "1:\\n\\t"
335331
"ll.<amo>\\t%0,%1\\n\\t"
336332
"and\\t%7,%0,%3\\n\\t"
337333
"sub.w\\t%8,%0,%z5\\n\\t"
@@ -340,7 +336,7 @@
340336
"sc.<amo>\\t%7,%1\\n\\t"
341337
"beq\\t$zero,%7,1b";
342338
}
343-
[(set (attr "length") (const_int 32))])
339+
[(set (attr "length") (const_int 28))])
344340

345341
(define_insn "atomic_cas_value_and_7_<mode>"
346342
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -356,8 +352,7 @@
356352
(clobber (match_scratch:GPR 8 "=&r"))]
357353
""
358354
{
359-
return "%G6\\n\\t"
360-
"1:\\n\\t"
355+
return "1:\\n\\t"
361356
"ll.<amo>\\t%0,%1\\n\\t"
362357
"and\\t%7,%0,%3\\n\\t"
363358
"and\\t%8,%0,%z5\\n\\t"
@@ -366,7 +361,7 @@
366361
"sc.<amo>\\t%7,%1\\n\\t"
367362
"beq\\t$zero,%7,1b";
368363
}
369-
[(set (attr "length") (const_int 32))])
364+
[(set (attr "length") (const_int 28))])
370365

371366
(define_insn "atomic_cas_value_xor_7_<mode>"
372367
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -382,8 +377,7 @@
382377
(clobber (match_scratch:GPR 8 "=&r"))]
383378
""
384379
{
385-
return "%G6\\n\\t"
386-
"1:\\n\\t"
380+
return "1:\\n\\t"
387381
"ll.<amo>\\t%0,%1\\n\\t"
388382
"and\\t%7,%0,%3\\n\\t"
389383
"xor\\t%8,%0,%z5\\n\\t"
@@ -393,7 +387,7 @@
393387
"beq\\t$zero,%7,1b";
394388
}
395389

396-
[(set (attr "length") (const_int 32))])
390+
[(set (attr "length") (const_int 28))])
397391

398392
(define_insn "atomic_cas_value_or_7_<mode>"
399393
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -409,8 +403,7 @@
409403
(clobber (match_scratch:GPR 8 "=&r"))]
410404
""
411405
{
412-
return "%G6\\n\\t"
413-
"1:\\n\\t"
406+
return "1:\\n\\t"
414407
"ll.<amo>\\t%0,%1\\n\\t"
415408
"and\\t%7,%0,%3\\n\\t"
416409
"or\\t%8,%0,%z5\\n\\t"
@@ -420,7 +413,7 @@
420413
"beq\\t$zero,%7,1b";
421414
}
422415

423-
[(set (attr "length") (const_int 32))])
416+
[(set (attr "length") (const_int 28))])
424417

425418
(define_insn "atomic_cas_value_nand_7_<mode>"
426419
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
@@ -436,8 +429,7 @@
436429
(clobber (match_scratch:GPR 8 "=&r"))]
437430
""
438431
{
439-
return "%G6\\n\\t"
440-
"1:\\n\\t"
432+
return "1:\\n\\t"
441433
"ll.<amo>\\t%0,%1\\n\\t"
442434
"and\\t%7,%0,%3\\n\\t"
443435
"and\\t%8,%0,%z5\\n\\t"
@@ -446,7 +438,7 @@
446438
"sc.<amo>\\t%7,%1\\n\\t"
447439
"beq\\t$zero,%7,1b";
448440
}
449-
[(set (attr "length") (const_int 32))])
441+
[(set (attr "length") (const_int 28))])
450442

451443
(define_insn "atomic_cas_value_exchange_7_<mode>"
452444
[(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -461,8 +453,7 @@
461453
(clobber (match_scratch:GPR 7 "=&r"))]
462454
""
463455
{
464-
return "%G6\\n\\t"
465-
"1:\\n\\t"
456+
return "1:\\n\\t"
466457
"ll.<amo>\\t%0,%1\\n\\t"
467458
"and\\t%7,%0,%z3\\n\\t"
468459
"or%i5\\t%7,%7,%5\\n\\t"
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/* { dg-do run } */
2+
/* { dg-require-effective-target c99_runtime } */
3+
/* { dg-require-effective-target pthread } */
4+
/* { dg-options "-std=c99 -pthread" } */
5+
6+
/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934
7+
reported that this had failed with GCC and 3A6000. */
8+
9+
#include <pthread.h>
10+
#include <stdatomic.h>
11+
#include <stdbool.h>
12+
#include <stdio.h>
13+
14+
static unsigned int tags[32];
15+
static unsigned int vals[32];
16+
17+
static void *
18+
writer_entry (void *data)
19+
{
20+
atomic_uint *pt = (atomic_uint *)tags;
21+
atomic_uint *pv = (atomic_uint *)vals;
22+
23+
for (unsigned int n = 1; n < 10000; n++)
24+
{
25+
atomic_store_explicit (&pv[n & 31], n, memory_order_release);
26+
atomic_store_explicit (&pt[n & 31], n, memory_order_release);
27+
}
28+
29+
return NULL;
30+
}
31+
32+
static void *
33+
reader_entry (void *data)
34+
{
35+
atomic_uint *pt = (atomic_uint *)tags;
36+
atomic_uint *pv = (atomic_uint *)vals;
37+
int i;
38+
39+
for (;;)
40+
{
41+
for (i = 0; i < 32; i++)
42+
{
43+
unsigned int tag = 0;
44+
bool res;
45+
46+
res = atomic_compare_exchange_weak_explicit (
47+
&pt[i], &tag, 0, memory_order_acquire, memory_order_acquire);
48+
if (!res)
49+
{
50+
unsigned int val;
51+
52+
val = atomic_load_explicit (&pv[i], memory_order_relaxed);
53+
if (val < tag)
54+
__builtin_trap ();
55+
}
56+
}
57+
}
58+
59+
return NULL;
60+
}
61+
62+
int
63+
main (int argc, char *argv[])
64+
{
65+
pthread_t writer;
66+
pthread_t reader;
67+
int res;
68+
69+
res = pthread_create (&writer, NULL, writer_entry, NULL);
70+
if (res < 0)
71+
__builtin_trap ();
72+
73+
res = pthread_create (&reader, NULL, reader_entry, NULL);
74+
if (res < 0)
75+
__builtin_trap ();
76+
77+
res = pthread_join (writer, NULL);
78+
if (res < 0)
79+
__builtin_trap ();
80+
81+
return 0;
82+
}

0 commit comments

Comments
 (0)