-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AArch64: Add tests for atomicrmw fp operations #103701
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-aarch64 Author: Matt Arsenault (arsenm) ChangesThere were only codegen tests for the fadd vector case, Patch is 111.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/103701.diff 5 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll
deleted file mode 100644
index a7539ac3cce802..00000000000000
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,NOLSE %s
-; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,LSE %s
-
-define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 {
-; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
-; NOLSE: // %bb.0:
-; NOLSE-NEXT: fcvtl v1.4s, v0.4h
-; NOLSE-NEXT: ldr s0, [x0]
-; NOLSE-NEXT: b .LBB0_2
-; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
-; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; NOLSE-NEXT: fmov s0, w10
-; NOLSE-NEXT: cmp w10, w9
-; NOLSE-NEXT: b.eq .LBB0_5
-; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
-; NOLSE-NEXT: // =>This Loop Header: Depth=1
-; NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; NOLSE-NEXT: fcvtl v2.4s, v0.4h
-; NOLSE-NEXT: fmov w9, s0
-; NOLSE-NEXT: fadd v2.4s, v2.4s, v1.4s
-; NOLSE-NEXT: fcvtn v2.4h, v2.4s
-; NOLSE-NEXT: fmov w8, s2
-; NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
-; NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
-; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT: ldaxr w10, [x0]
-; NOLSE-NEXT: cmp w10, w9
-; NOLSE-NEXT: b.ne .LBB0_1
-; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
-; NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
-; NOLSE-NEXT: stlxr wzr, w8, [x0]
-; NOLSE-NEXT: cbnz wzr, .LBB0_3
-; NOLSE-NEXT: b .LBB0_1
-; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
-; NOLSE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NOLSE-NEXT: ret
-;
-; LSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
-; LSE: // %bb.0:
-; LSE-NEXT: fcvtl v1.4s, v0.4h
-; LSE-NEXT: ldr s0, [x0]
-; LSE-NEXT: .LBB0_1: // %atomicrmw.start
-; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: fcvtl v2.4s, v0.4h
-; LSE-NEXT: fmov w8, s0
-; LSE-NEXT: mov w10, w8
-; LSE-NEXT: fadd v2.4s, v2.4s, v1.4s
-; LSE-NEXT: fcvtn v2.4h, v2.4s
-; LSE-NEXT: fmov w9, s2
-; LSE-NEXT: casal w10, w9, [x0]
-; LSE-NEXT: fmov s0, w10
-; LSE-NEXT: cmp w10, w8
-; LSE-NEXT: b.ne .LBB0_1
-; LSE-NEXT: // %bb.2: // %atomicrmw.end
-; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; LSE-NEXT: ret
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4
- ret <2 x half> %res
-}
-
-define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 {
-; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
-; NOLSE: // %bb.0:
-; NOLSE-NEXT: ldr d1, [x0]
-; NOLSE-NEXT: b .LBB1_2
-; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
-; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; NOLSE-NEXT: fmov d1, x10
-; NOLSE-NEXT: cmp x10, x9
-; NOLSE-NEXT: b.eq .LBB1_5
-; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
-; NOLSE-NEXT: // =>This Loop Header: Depth=1
-; NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; NOLSE-NEXT: fadd v2.2s, v1.2s, v0.2s
-; NOLSE-NEXT: fmov x9, d1
-; NOLSE-NEXT: fmov x8, d2
-; NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
-; NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
-; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT: ldaxr x10, [x0]
-; NOLSE-NEXT: cmp x10, x9
-; NOLSE-NEXT: b.ne .LBB1_1
-; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
-; NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
-; NOLSE-NEXT: stlxr wzr, x8, [x0]
-; NOLSE-NEXT: cbnz wzr, .LBB1_3
-; NOLSE-NEXT: b .LBB1_1
-; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
-; NOLSE-NEXT: fmov d0, d1
-; NOLSE-NEXT: ret
-;
-; LSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
-; LSE: // %bb.0:
-; LSE-NEXT: ldr d1, [x0]
-; LSE-NEXT: .LBB1_1: // %atomicrmw.start
-; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: fadd v2.2s, v1.2s, v0.2s
-; LSE-NEXT: fmov x8, d1
-; LSE-NEXT: mov x10, x8
-; LSE-NEXT: fmov x9, d2
-; LSE-NEXT: casal x10, x9, [x0]
-; LSE-NEXT: fmov d1, x10
-; LSE-NEXT: cmp x10, x8
-; LSE-NEXT: b.ne .LBB1_1
-; LSE-NEXT: // %bb.2: // %atomicrmw.end
-; LSE-NEXT: fmov d0, d1
-; LSE-NEXT: ret
- %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8
- ret <2 x float> %res
-}
-
-attributes #0 = { nounwind }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
new file mode 100644
index 00000000000000..f95caf325b197c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -0,0 +1,706 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s
+; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s
+
+define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: fcvt s1, h0
+; NOLSE-NEXT: ldr h0, [x0]
+; NOLSE-NEXT: b .LBB0_2
+; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
+; NOLSE-NEXT: fmov s0, w10
+; NOLSE-NEXT: cmp w10, w9, uxth
+; NOLSE-NEXT: b.eq .LBB0_5
+; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB0_3 Depth 2
+; NOLSE-NEXT: fcvt s2, h0
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: fadd s2, s2, s1
+; NOLSE-NEXT: fcvt h2, s2
+; NOLSE-NEXT: fmov w8, s2
+; NOLSE-NEXT: .LBB0_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxrh w10, [x0]
+; NOLSE-NEXT: cmp w10, w9, uxth
+; NOLSE-NEXT: b.ne .LBB0_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
+; NOLSE-NEXT: stlxrh wzr, w8, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB0_3
+; NOLSE-NEXT: b .LBB0_1
+; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end
+; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
+; LSE: // %bb.0:
+; LSE-NEXT: fcvt s1, h0
+; LSE-NEXT: ldr h0, [x0]
+; LSE-NEXT: .LBB0_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fcvt s2, h0
+; LSE-NEXT: fmov w8, s0
+; LSE-NEXT: mov w10, w8
+; LSE-NEXT: fadd s2, s2, s1
+; LSE-NEXT: fcvt h2, s2
+; LSE-NEXT: fmov w9, s2
+; LSE-NEXT: casalh w10, w9, [x0]
+; LSE-NEXT: fmov s0, w10
+; LSE-NEXT: cmp w10, w8, uxth
+; LSE-NEXT: b.ne .LBB0_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+ ret half %res
+}
+
+define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: fcvt s1, h0
+; NOLSE-NEXT: ldr h0, [x0]
+; NOLSE-NEXT: b .LBB1_2
+; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
+; NOLSE-NEXT: fmov s0, w10
+; NOLSE-NEXT: cmp w10, w9, uxth
+; NOLSE-NEXT: b.eq .LBB1_5
+; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB1_3 Depth 2
+; NOLSE-NEXT: fcvt s2, h0
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: fadd s2, s2, s1
+; NOLSE-NEXT: fcvt h2, s2
+; NOLSE-NEXT: fmov w8, s2
+; NOLSE-NEXT: .LBB1_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxrh w10, [x0]
+; NOLSE-NEXT: cmp w10, w9, uxth
+; NOLSE-NEXT: b.ne .LBB1_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
+; NOLSE-NEXT: stlxrh wzr, w8, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB1_3
+; NOLSE-NEXT: b .LBB1_1
+; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end
+; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
+; LSE: // %bb.0:
+; LSE-NEXT: fcvt s1, h0
+; LSE-NEXT: ldr h0, [x0]
+; LSE-NEXT: .LBB1_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fcvt s2, h0
+; LSE-NEXT: fmov w8, s0
+; LSE-NEXT: mov w10, w8
+; LSE-NEXT: fadd s2, s2, s1
+; LSE-NEXT: fcvt h2, s2
+; LSE-NEXT: fmov w9, s2
+; LSE-NEXT: casalh w10, w9, [x0]
+; LSE-NEXT: fmov s0, w10
+; LSE-NEXT: cmp w10, w8, uxth
+; LSE-NEXT: b.ne .LBB1_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4
+ ret half %res
+}
+
+define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: mov w8, #32767 // =0x7fff
+; NOLSE-NEXT: ldr h0, [x0]
+; NOLSE-NEXT: lsl w9, w9, #16
+; NOLSE-NEXT: fmov s1, w9
+; NOLSE-NEXT: b .LBB2_2
+; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
+; NOLSE-NEXT: fmov s0, w11
+; NOLSE-NEXT: cmp w11, w9, uxth
+; NOLSE-NEXT: b.eq .LBB2_5
+; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB2_3 Depth 2
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: lsl w9, w9, #16
+; NOLSE-NEXT: fmov s2, w9
+; NOLSE-NEXT: fadd s2, s2, s1
+; NOLSE-NEXT: fmov w9, s2
+; NOLSE-NEXT: ubfx w10, w9, #16, #1
+; NOLSE-NEXT: add w9, w9, w8
+; NOLSE-NEXT: add w9, w10, w9
+; NOLSE-NEXT: lsr w9, w9, #16
+; NOLSE-NEXT: fmov s2, w9
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: fmov w10, s2
+; NOLSE-NEXT: .LBB2_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxrh w11, [x0]
+; NOLSE-NEXT: cmp w11, w9, uxth
+; NOLSE-NEXT: b.ne .LBB2_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
+; NOLSE-NEXT: stlxrh wzr, w10, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB2_3
+; NOLSE-NEXT: b .LBB2_1
+; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end
+; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; LSE-NEXT: fmov w9, s0
+; LSE-NEXT: mov w8, #32767 // =0x7fff
+; LSE-NEXT: ldr h0, [x0]
+; LSE-NEXT: lsl w9, w9, #16
+; LSE-NEXT: fmov s1, w9
+; LSE-NEXT: .LBB2_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fmov w9, s0
+; LSE-NEXT: lsl w9, w9, #16
+; LSE-NEXT: fmov s2, w9
+; LSE-NEXT: fadd s2, s2, s1
+; LSE-NEXT: fmov w9, s2
+; LSE-NEXT: ubfx w10, w9, #16, #1
+; LSE-NEXT: add w9, w9, w8
+; LSE-NEXT: add w9, w10, w9
+; LSE-NEXT: fmov w10, s0
+; LSE-NEXT: lsr w9, w9, #16
+; LSE-NEXT: mov w11, w10
+; LSE-NEXT: casalh w11, w9, [x0]
+; LSE-NEXT: fmov s0, w11
+; LSE-NEXT: cmp w11, w10, uxth
+; LSE-NEXT: b.ne .LBB2_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
+ ret bfloat %res
+}
+
+define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: mov w8, #32767 // =0x7fff
+; NOLSE-NEXT: ldr h0, [x0]
+; NOLSE-NEXT: lsl w9, w9, #16
+; NOLSE-NEXT: fmov s1, w9
+; NOLSE-NEXT: b .LBB3_2
+; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
+; NOLSE-NEXT: fmov s0, w11
+; NOLSE-NEXT: cmp w11, w9, uxth
+; NOLSE-NEXT: b.eq .LBB3_5
+; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB3_3 Depth 2
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: lsl w9, w9, #16
+; NOLSE-NEXT: fmov s2, w9
+; NOLSE-NEXT: fadd s2, s2, s1
+; NOLSE-NEXT: fmov w9, s2
+; NOLSE-NEXT: ubfx w10, w9, #16, #1
+; NOLSE-NEXT: add w9, w9, w8
+; NOLSE-NEXT: add w9, w10, w9
+; NOLSE-NEXT: lsr w9, w9, #16
+; NOLSE-NEXT: fmov s2, w9
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: fmov w10, s2
+; NOLSE-NEXT: .LBB3_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxrh w11, [x0]
+; NOLSE-NEXT: cmp w11, w9, uxth
+; NOLSE-NEXT: b.ne .LBB3_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
+; NOLSE-NEXT: stlxrh wzr, w10, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB3_3
+; NOLSE-NEXT: b .LBB3_1
+; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end
+; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; LSE-NEXT: fmov w9, s0
+; LSE-NEXT: mov w8, #32767 // =0x7fff
+; LSE-NEXT: ldr h0, [x0]
+; LSE-NEXT: lsl w9, w9, #16
+; LSE-NEXT: fmov s1, w9
+; LSE-NEXT: .LBB3_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fmov w9, s0
+; LSE-NEXT: lsl w9, w9, #16
+; LSE-NEXT: fmov s2, w9
+; LSE-NEXT: fadd s2, s2, s1
+; LSE-NEXT: fmov w9, s2
+; LSE-NEXT: ubfx w10, w9, #16, #1
+; LSE-NEXT: add w9, w9, w8
+; LSE-NEXT: add w9, w10, w9
+; LSE-NEXT: fmov w10, s0
+; LSE-NEXT: lsr w9, w9, #16
+; LSE-NEXT: mov w11, w10
+; LSE-NEXT: casalh w11, w9, [x0]
+; LSE-NEXT: fmov s0, w11
+; LSE-NEXT: cmp w11, w10, uxth
+; LSE-NEXT: b.ne .LBB3_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
+ ret bfloat %res
+}
+
+define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: ldr s1, [x0]
+; NOLSE-NEXT: b .LBB4_2
+; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
+; NOLSE-NEXT: fmov s1, w10
+; NOLSE-NEXT: cmp w10, w9
+; NOLSE-NEXT: b.eq .LBB4_5
+; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB4_3 Depth 2
+; NOLSE-NEXT: fadd s2, s1, s0
+; NOLSE-NEXT: fmov w9, s1
+; NOLSE-NEXT: fmov w8, s2
+; NOLSE-NEXT: .LBB4_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxr w10, [x0]
+; NOLSE-NEXT: cmp w10, w9
+; NOLSE-NEXT: b.ne .LBB4_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
+; NOLSE-NEXT: stlxr wzr, w8, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB4_3
+; NOLSE-NEXT: b .LBB4_1
+; NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
+; NOLSE-NEXT: fmov s0, s1
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
+; LSE: // %bb.0:
+; LSE-NEXT: ldr s1, [x0]
+; LSE-NEXT: .LBB4_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fadd s2, s1, s0
+; LSE-NEXT: fmov w8, s1
+; LSE-NEXT: mov w10, w8
+; LSE-NEXT: fmov w9, s2
+; LSE-NEXT: casal w10, w9, [x0]
+; LSE-NEXT: fmov s1, w10
+; LSE-NEXT: cmp w10, w8
+; LSE-NEXT: b.ne .LBB4_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: fmov s0, s1
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+ ret float %res
+}
+
+define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: ldr d1, [x0]
+; NOLSE-NEXT: b .LBB5_2
+; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
+; NOLSE-NEXT: fmov d1, x10
+; NOLSE-NEXT: cmp x10, x9
+; NOLSE-NEXT: b.eq .LBB5_5
+; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB5_3 Depth 2
+; NOLSE-NEXT: fadd d2, d1, d0
+; NOLSE-NEXT: fmov x9, d1
+; NOLSE-NEXT: fmov x8, d2
+; NOLSE-NEXT: .LBB5_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxr x10, [x0]
+; NOLSE-NEXT: cmp x10, x9
+; NOLSE-NEXT: b.ne .LBB5_1
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
+; NOLSE-NEXT: stlxr wzr, x8, [x0]
+; NOLSE-NEXT: cbnz wzr, .LBB5_3
+; NOLSE-NEXT: b .LBB5_1
+; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end
+; NOLSE-NEXT: fmov d0, d1
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
+; LSE: // %bb.0:
+; LSE-NEXT: ldr d1, [x0]
+; LSE-NEXT: .LBB5_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: fadd d2, d1, d0
+; LSE-NEXT: fmov x8, d1
+; LSE-NEXT: mov x10, x8
+; LSE-NEXT: fmov x9, d2
+; LSE-NEXT: casal x10, x9, [x0]
+; LSE-NEXT: fmov d1, x10
+; LSE-NEXT: cmp x10, x8
+; LSE-NEXT: b.ne .LBB5_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: fmov d0, d1
+; LSE-NEXT: ret
+ %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+ ret double %res
+}
+
+define fp128 @test_atomicrmw_fadd_f32_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align16:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: sub sp, sp, #96
+; NOLSE-NEXT: ldr q1, [x0]
+; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; NOLSE-NEXT: mov x19, x0
+; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NOLSE-NEXT: b .LBB6_2
+; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
+; NOLSE-NEXT: stp x12, x13, [sp, #32]
+; NOLSE-NEXT: cmp x13, x10
+; NOLSE-NEXT: ldr q1, [sp, #32]
+; NOLSE-NEXT: ccmp x12, x11, #0, eq
+; NOLSE-NEXT: b.eq .LBB6_6
+; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Loop Header: Depth=1
+; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
+; NOLSE-NEXT: mov v0.16b, v1.16b
+; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NOLSE-NEXT: bl __addtf3
+; NOLSE-NEXT: str q0, [sp, #48]
+; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NOLSE-NEXT: ldp x9, x8, [sp, #48]
+; NOLSE-NEXT: str q0, [sp, #64]
+; NOLSE-NEXT: ldp x11, x10, [sp, #64]
+; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
+; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
+; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT: ldaxp x12, x13, [x19]
+; NOLSE-NEXT: cmp x12, x11
+; NOLSE-NEXT: cset w14, ne
+; NOLSE-NEXT: cmp x13, x10
+; NOLSE-NEXT: cinc w14, w14, ne
+; NOLSE-NEXT: cbz w14, .LBB6_5
+; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
+; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
+; NOLSE-NEXT: cbnz w14, .LBB6_3
+; NOLSE-NEXT: b .LBB6_1
+; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
+; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth...
[truncated]
|
We have a decent number of atomic tests in llvm/test/CodeGen/AArch64/Atomics, but I don't believe they include fp operations yet. |
Grep doesn't find anything |
Yeah that was a TODO item I never got around to. |
38ce457
to
07be231
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
07ad2a3
to
eb21c2b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have no idea why the windows bot keeps failing on the fmin and fmax tests. The run lines are all using explicit, full triples
They seem to be getting different offsets for temporary stack objects. Linux/Mac output has the stack store after the call at offset 48, and the windows bot gets offset 64. ; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start Windows bot: ldr q1, [sp] // 16-byte Folded Reload ldp x11, x10, [sp, #48] |
eb21c2b
to
9c924de
Compare
There were only codegen tests for the fadd vector case, so round out the test coverage for the scalar cases and all the other operations.
9c924de
to
03949db
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/4252 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/4277 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/4162 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/2049 Here is the relevant piece of the build log for the reference
|
There were only codegen tests for the fadd vector case,
so round out the test coverage for the scalar cases
and all the other operations.