AArch64: Add tests for atomicrmw fp operations #103701

arsenm · 2024-08-14T06:45:08Z

There were only codegen tests for the fadd vector case,
so round out the test coverage for the scalar cases
and all the other operations.

arsenm · 2024-08-14T06:45:23Z

AArch64: Use consistent atomicrmw expansion for FP operations #103702
AArch64: Add tests for atomicrmw fp operations #103701 👈
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-08-14T06:45:57Z

@llvm/pr-subscribers-backend-aarch64

Author: Matt Arsenault (arsenm)

Changes

There were only codegen tests for the fadd vector case,
so round out the test coverage for the scalar cases
and all the other operations.

Patch is 111.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/103701.diff

5 Files Affected:

(removed) llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll (-115)
(added) llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll (+706)
(added) llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll (+766)
(added) llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll (+766)
(added) llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll (+706)

diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll
deleted file mode 100644
index a7539ac3cce802..00000000000000
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,NOLSE %s
-; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,LSE %s
-
-define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 {
-; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
-; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    fcvtl v1.4s, v0.4h
-; NOLSE-NEXT:    ldr s0, [x0]
-; NOLSE-NEXT:    b .LBB0_2
-; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
-; NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; NOLSE-NEXT:    fmov s0, w10
-; NOLSE-NEXT:    cmp w10, w9
-; NOLSE-NEXT:    b.eq .LBB0_5
-; NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
-; NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
-; NOLSE-NEXT:    fcvtl v2.4s, v0.4h
-; NOLSE-NEXT:    fmov w9, s0
-; NOLSE-NEXT:    fadd v2.4s, v2.4s, v1.4s
-; NOLSE-NEXT:    fcvtn v2.4h, v2.4s
-; NOLSE-NEXT:    fmov w8, s2
-; NOLSE-NEXT:  .LBB0_3: // %atomicrmw.start
-; NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
-; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr w10, [x0]
-; NOLSE-NEXT:    cmp w10, w9
-; NOLSE-NEXT:    b.ne .LBB0_1
-; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
-; NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
-; NOLSE-NEXT:    stlxr wzr, w8, [x0]
-; NOLSE-NEXT:    cbnz wzr, .LBB0_3
-; NOLSE-NEXT:    b .LBB0_1
-; NOLSE-NEXT:  .LBB0_5: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; NOLSE-NEXT:    ret
-;
-; LSE-LABEL: test_atomicrmw_fadd_v2f16_align4:
-; LSE:       // %bb.0:
-; LSE-NEXT:    fcvtl v1.4s, v0.4h
-; LSE-NEXT:    ldr s0, [x0]
-; LSE-NEXT:  .LBB0_1: // %atomicrmw.start
-; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
-; LSE-NEXT:    fcvtl v2.4s, v0.4h
-; LSE-NEXT:    fmov w8, s0
-; LSE-NEXT:    mov w10, w8
-; LSE-NEXT:    fadd v2.4s, v2.4s, v1.4s
-; LSE-NEXT:    fcvtn v2.4h, v2.4s
-; LSE-NEXT:    fmov w9, s2
-; LSE-NEXT:    casal w10, w9, [x0]
-; LSE-NEXT:    fmov s0, w10
-; LSE-NEXT:    cmp w10, w8
-; LSE-NEXT:    b.ne .LBB0_1
-; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; LSE-NEXT:    ret
-  %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4
-  ret <2 x half> %res
-}
-
-define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 {
-; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
-; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    ldr d1, [x0]
-; NOLSE-NEXT:    b .LBB1_2
-; NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
-; NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; NOLSE-NEXT:    fmov d1, x10
-; NOLSE-NEXT:    cmp x10, x9
-; NOLSE-NEXT:    b.eq .LBB1_5
-; NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
-; NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
-; NOLSE-NEXT:    fadd v2.2s, v1.2s, v0.2s
-; NOLSE-NEXT:    fmov x9, d1
-; NOLSE-NEXT:    fmov x8, d2
-; NOLSE-NEXT:  .LBB1_3: // %atomicrmw.start
-; NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
-; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr x10, [x0]
-; NOLSE-NEXT:    cmp x10, x9
-; NOLSE-NEXT:    b.ne .LBB1_1
-; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
-; NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
-; NOLSE-NEXT:    stlxr wzr, x8, [x0]
-; NOLSE-NEXT:    cbnz wzr, .LBB1_3
-; NOLSE-NEXT:    b .LBB1_1
-; NOLSE-NEXT:  .LBB1_5: // %atomicrmw.end
-; NOLSE-NEXT:    fmov d0, d1
-; NOLSE-NEXT:    ret
-;
-; LSE-LABEL: test_atomicrmw_fadd_v2f32_align8:
-; LSE:       // %bb.0:
-; LSE-NEXT:    ldr d1, [x0]
-; LSE-NEXT:  .LBB1_1: // %atomicrmw.start
-; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
-; LSE-NEXT:    fadd v2.2s, v1.2s, v0.2s
-; LSE-NEXT:    fmov x8, d1
-; LSE-NEXT:    mov x10, x8
-; LSE-NEXT:    fmov x9, d2
-; LSE-NEXT:    casal x10, x9, [x0]
-; LSE-NEXT:    fmov d1, x10
-; LSE-NEXT:    cmp x10, x8
-; LSE-NEXT:    b.ne .LBB1_1
-; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    fmov d0, d1
-; LSE-NEXT:    ret
-  %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8
-  ret <2 x float> %res
-}
-
-attributes #0 = { nounwind }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
new file mode 100644
index 00000000000000..f95caf325b197c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -0,0 +1,706 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s
+; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s
+
+define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    fcvt s1, h0
+; NOLSE-NEXT:    ldr h0, [x0]
+; NOLSE-NEXT:    b .LBB0_2
+; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
+; NOLSE-NEXT:    fmov s0, w10
+; NOLSE-NEXT:    cmp w10, w9, uxth
+; NOLSE-NEXT:    b.eq .LBB0_5
+; NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
+; NOLSE-NEXT:    fcvt s2, h0
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    fadd s2, s2, s1
+; NOLSE-NEXT:    fcvt h2, s2
+; NOLSE-NEXT:    fmov w8, s2
+; NOLSE-NEXT:  .LBB0_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxrh w10, [x0]
+; NOLSE-NEXT:    cmp w10, w9, uxth
+; NOLSE-NEXT:    b.ne .LBB0_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
+; NOLSE-NEXT:    stlxrh wzr, w8, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB0_3
+; NOLSE-NEXT:    b .LBB0_1
+; NOLSE-NEXT:  .LBB0_5: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
+; LSE:       // %bb.0:
+; LSE-NEXT:    fcvt s1, h0
+; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:  .LBB0_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fcvt s2, h0
+; LSE-NEXT:    fmov w8, s0
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:    fadd s2, s2, s1
+; LSE-NEXT:    fcvt h2, s2
+; LSE-NEXT:    fmov w9, s2
+; LSE-NEXT:    casalh w10, w9, [x0]
+; LSE-NEXT:    fmov s0, w10
+; LSE-NEXT:    cmp w10, w8, uxth
+; LSE-NEXT:    b.ne .LBB0_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
+  ret half %res
+}
+
+define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    fcvt s1, h0
+; NOLSE-NEXT:    ldr h0, [x0]
+; NOLSE-NEXT:    b .LBB1_2
+; NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; NOLSE-NEXT:    fmov s0, w10
+; NOLSE-NEXT:    cmp w10, w9, uxth
+; NOLSE-NEXT:    b.eq .LBB1_5
+; NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
+; NOLSE-NEXT:    fcvt s2, h0
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    fadd s2, s2, s1
+; NOLSE-NEXT:    fcvt h2, s2
+; NOLSE-NEXT:    fmov w8, s2
+; NOLSE-NEXT:  .LBB1_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxrh w10, [x0]
+; NOLSE-NEXT:    cmp w10, w9, uxth
+; NOLSE-NEXT:    b.ne .LBB1_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
+; NOLSE-NEXT:    stlxrh wzr, w8, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB1_3
+; NOLSE-NEXT:    b .LBB1_1
+; NOLSE-NEXT:  .LBB1_5: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
+; LSE:       // %bb.0:
+; LSE-NEXT:    fcvt s1, h0
+; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:  .LBB1_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fcvt s2, h0
+; LSE-NEXT:    fmov w8, s0
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:    fadd s2, s2, s1
+; LSE-NEXT:    fcvt h2, s2
+; LSE-NEXT:    fmov w9, s2
+; LSE-NEXT:    casalh w10, w9, [x0]
+; LSE-NEXT:    fmov s0, w10
+; LSE-NEXT:    cmp w10, w8, uxth
+; LSE-NEXT:    b.ne .LBB1_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4
+  ret half %res
+}
+
+define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
+; NOLSE-NEXT:    ldr h0, [x0]
+; NOLSE-NEXT:    lsl w9, w9, #16
+; NOLSE-NEXT:    fmov s1, w9
+; NOLSE-NEXT:    b .LBB2_2
+; NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
+; NOLSE-NEXT:    fmov s0, w11
+; NOLSE-NEXT:    cmp w11, w9, uxth
+; NOLSE-NEXT:    b.eq .LBB2_5
+; NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    lsl w9, w9, #16
+; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    fadd s2, s2, s1
+; NOLSE-NEXT:    fmov w9, s2
+; NOLSE-NEXT:    ubfx w10, w9, #16, #1
+; NOLSE-NEXT:    add w9, w9, w8
+; NOLSE-NEXT:    add w9, w10, w9
+; NOLSE-NEXT:    lsr w9, w9, #16
+; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    fmov w10, s2
+; NOLSE-NEXT:  .LBB2_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxrh w11, [x0]
+; NOLSE-NEXT:    cmp w11, w9, uxth
+; NOLSE-NEXT:    b.ne .LBB2_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
+; NOLSE-NEXT:    stlxrh wzr, w10, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB2_3
+; NOLSE-NEXT:    b .LBB2_1
+; NOLSE-NEXT:  .LBB2_5: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
+; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
+; LSE-NEXT:    fmov w9, s0
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    lsl w9, w9, #16
+; LSE-NEXT:    fmov s1, w9
+; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fmov w9, s0
+; LSE-NEXT:    lsl w9, w9, #16
+; LSE-NEXT:    fmov s2, w9
+; LSE-NEXT:    fadd s2, s2, s1
+; LSE-NEXT:    fmov w9, s2
+; LSE-NEXT:    ubfx w10, w9, #16, #1
+; LSE-NEXT:    add w9, w9, w8
+; LSE-NEXT:    add w9, w10, w9
+; LSE-NEXT:    fmov w10, s0
+; LSE-NEXT:    lsr w9, w9, #16
+; LSE-NEXT:    mov w11, w10
+; LSE-NEXT:    casalh w11, w9, [x0]
+; LSE-NEXT:    fmov s0, w11
+; LSE-NEXT:    cmp w11, w10, uxth
+; LSE-NEXT:    b.ne .LBB2_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
+  ret bfloat %res
+}
+
+define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
+; NOLSE-NEXT:    ldr h0, [x0]
+; NOLSE-NEXT:    lsl w9, w9, #16
+; NOLSE-NEXT:    fmov s1, w9
+; NOLSE-NEXT:    b .LBB3_2
+; NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
+; NOLSE-NEXT:    fmov s0, w11
+; NOLSE-NEXT:    cmp w11, w9, uxth
+; NOLSE-NEXT:    b.eq .LBB3_5
+; NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    lsl w9, w9, #16
+; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    fadd s2, s2, s1
+; NOLSE-NEXT:    fmov w9, s2
+; NOLSE-NEXT:    ubfx w10, w9, #16, #1
+; NOLSE-NEXT:    add w9, w9, w8
+; NOLSE-NEXT:    add w9, w10, w9
+; NOLSE-NEXT:    lsr w9, w9, #16
+; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    fmov w9, s0
+; NOLSE-NEXT:    fmov w10, s2
+; NOLSE-NEXT:  .LBB3_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxrh w11, [x0]
+; NOLSE-NEXT:    cmp w11, w9, uxth
+; NOLSE-NEXT:    b.ne .LBB3_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
+; NOLSE-NEXT:    stlxrh wzr, w10, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB3_3
+; NOLSE-NEXT:    b .LBB3_1
+; NOLSE-NEXT:  .LBB3_5: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
+; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
+; LSE-NEXT:    fmov w9, s0
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    lsl w9, w9, #16
+; LSE-NEXT:    fmov s1, w9
+; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fmov w9, s0
+; LSE-NEXT:    lsl w9, w9, #16
+; LSE-NEXT:    fmov s2, w9
+; LSE-NEXT:    fadd s2, s2, s1
+; LSE-NEXT:    fmov w9, s2
+; LSE-NEXT:    ubfx w10, w9, #16, #1
+; LSE-NEXT:    add w9, w9, w8
+; LSE-NEXT:    add w9, w10, w9
+; LSE-NEXT:    fmov w10, s0
+; LSE-NEXT:    lsr w9, w9, #16
+; LSE-NEXT:    mov w11, w10
+; LSE-NEXT:    casalh w11, w9, [x0]
+; LSE-NEXT:    fmov s0, w11
+; LSE-NEXT:    cmp w11, w10, uxth
+; LSE-NEXT:    b.ne .LBB3_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
+  ret bfloat %res
+}
+
+define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    ldr s1, [x0]
+; NOLSE-NEXT:    b .LBB4_2
+; NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
+; NOLSE-NEXT:    fmov s1, w10
+; NOLSE-NEXT:    cmp w10, w9
+; NOLSE-NEXT:    b.eq .LBB4_5
+; NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
+; NOLSE-NEXT:    fadd s2, s1, s0
+; NOLSE-NEXT:    fmov w9, s1
+; NOLSE-NEXT:    fmov w8, s2
+; NOLSE-NEXT:  .LBB4_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxr w10, [x0]
+; NOLSE-NEXT:    cmp w10, w9
+; NOLSE-NEXT:    b.ne .LBB4_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
+; NOLSE-NEXT:    stlxr wzr, w8, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB4_3
+; NOLSE-NEXT:    b .LBB4_1
+; NOLSE-NEXT:  .LBB4_5: // %atomicrmw.end
+; NOLSE-NEXT:    fmov s0, s1
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
+; LSE:       // %bb.0:
+; LSE-NEXT:    ldr s1, [x0]
+; LSE-NEXT:  .LBB4_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fadd s2, s1, s0
+; LSE-NEXT:    fmov w8, s1
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:    fmov w9, s2
+; LSE-NEXT:    casal w10, w9, [x0]
+; LSE-NEXT:    fmov s1, w10
+; LSE-NEXT:    cmp w10, w8
+; LSE-NEXT:    b.ne .LBB4_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    fmov s0, s1
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
+  ret float %res
+}
+
+define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    ldr d1, [x0]
+; NOLSE-NEXT:    b .LBB5_2
+; NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
+; NOLSE-NEXT:    fmov d1, x10
+; NOLSE-NEXT:    cmp x10, x9
+; NOLSE-NEXT:    b.eq .LBB5_5
+; NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
+; NOLSE-NEXT:    fadd d2, d1, d0
+; NOLSE-NEXT:    fmov x9, d1
+; NOLSE-NEXT:    fmov x8, d2
+; NOLSE-NEXT:  .LBB5_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxr x10, [x0]
+; NOLSE-NEXT:    cmp x10, x9
+; NOLSE-NEXT:    b.ne .LBB5_1
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
+; NOLSE-NEXT:    stlxr wzr, x8, [x0]
+; NOLSE-NEXT:    cbnz wzr, .LBB5_3
+; NOLSE-NEXT:    b .LBB5_1
+; NOLSE-NEXT:  .LBB5_5: // %atomicrmw.end
+; NOLSE-NEXT:    fmov d0, d1
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    ldr d1, [x0]
+; LSE-NEXT:  .LBB5_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    fadd d2, d1, d0
+; LSE-NEXT:    fmov x8, d1
+; LSE-NEXT:    mov x10, x8
+; LSE-NEXT:    fmov x9, d2
+; LSE-NEXT:    casal x10, x9, [x0]
+; LSE-NEXT:    fmov d1, x10
+; LSE-NEXT:    cmp x10, x8
+; LSE-NEXT:    b.ne .LBB5_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    fmov d0, d1
+; LSE-NEXT:    ret
+  %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
+  ret double %res
+}
+
+define fp128 @test_atomicrmw_fadd_f32_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
+; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align16:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    sub sp, sp, #96
+; NOLSE-NEXT:    ldr q1, [x0]
+; NOLSE-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; NOLSE-NEXT:    mov x19, x0
+; NOLSE-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NOLSE-NEXT:    b .LBB6_2
+; NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
+; NOLSE-NEXT:    stp x12, x13, [sp, #32]
+; NOLSE-NEXT:    cmp x13, x10
+; NOLSE-NEXT:    ldr q1, [sp, #32]
+; NOLSE-NEXT:    ccmp x12, x11, #0, eq
+; NOLSE-NEXT:    b.eq .LBB6_6
+; NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Loop Header: Depth=1
+; NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
+; NOLSE-NEXT:    mov v0.16b, v1.16b
+; NOLSE-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NOLSE-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NOLSE-NEXT:    bl __addtf3
+; NOLSE-NEXT:    str q0, [sp, #48]
+; NOLSE-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; NOLSE-NEXT:    ldp x9, x8, [sp, #48]
+; NOLSE-NEXT:    str q0, [sp, #64]
+; NOLSE-NEXT:    ldp x11, x10, [sp, #64]
+; NOLSE-NEXT:  .LBB6_3: // %atomicrmw.start
+; NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
+; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
+; NOLSE-NEXT:    ldaxp x12, x13, [x19]
+; NOLSE-NEXT:    cmp x12, x11
+; NOLSE-NEXT:    cset w14, ne
+; NOLSE-NEXT:    cmp x13, x10
+; NOLSE-NEXT:    cinc w14, w14, ne
+; NOLSE-NEXT:    cbz w14, .LBB6_5
+; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
+; NOLSE-NEXT:    stlxp w14, x12, x13, [x19]
+; NOLSE-NEXT:    cbnz w14, .LBB6_3
+; NOLSE-NEXT:    b .LBB6_1
+; NOLSE-NEXT:  .LBB6_5: // %atomicrmw.start
+; NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth...
[truncated]

davemgreen · 2024-08-14T08:18:01Z

We have a decent number of atomic tests in llvm/test/CodeGen/AArch64/Atomics, but I don't believe they include fp operations yet.

arsenm · 2024-08-14T11:27:37Z

We have a decent number of atomic tests in llvm/test/CodeGen/AArch64/Atomics, but I don't believe they include fp operations yet.

Grep doesn't find anything

tmatheson-arm · 2024-08-15T13:27:09Z

We have a decent number of atomic tests in llvm/test/CodeGen/AArch64/Atomics, but I don't believe they include fp operations yet.

Grep doesn't find anything

Yeah that was a TODO item I never got around to.
All of the other tests are generated by https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py

arsenm · 2024-08-22T11:31:59Z

ping

efriedma-quic

LGTM

arsenm

I have no idea why the windows bot keeps failing on the fmin and fmax tests. The run lines are all using explicit, full triples

arsenm · 2024-08-29T09:27:04Z

I have no idea why the windows bot keeps failing on the fmin and fmax tests. The run lines are all using explicit, full triples

They seem to be getting different offsets for temporary stack objects. Linux/Mac output has the stack store after the call at offset 48, and the windows bot gets offset 64.

; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: bl fminl
; NOLSE-NEXT: str q0, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
; NOLSE-NEXT: str q0, [sp, #64]
; NOLSE-NEXT: ldp x11, x10, [sp, #64]

Windows bot:

ldr q1, [sp] // 16-byte Folded Reload
bl fminl
str q0, [sp, #64]
ldr q0, [sp, #16] // 16-byte Folded Reload
274: ldp x9, x8, [sp, #64]
str q0, [sp, #48]

ldp x11, x10, [sp, #48]
.LBB6_3: // %atomicrmw.start

arsenm · 2024-08-29T15:32:03Z

Merge activity

Aug 29, 11:32 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
Aug 29, 11:34 AM EDT: Graphite rebased this pull request as part of a merge.
Aug 29, 11:36 AM EDT: @arsenm merged this pull request with Graphite.

There were only codegen tests for the fadd vector case, so round out the test coverage for the scalar cases and all the other operations.

llvm-ci · 2024-08-29T15:45:46Z

LLVM Buildbot has detected a new failure on builder ml-opt-rel-x86-64 running on ml-opt-rel-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/4252

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AArch64/atomicrmw-fmin.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o - | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -check-prefix=NOLSE /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o -
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -check-prefix=NOLSE /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll:636:15: error: NOLSE-NEXT: is not on the line after the previous match
; NOLSE-NEXT: str q0, [sp, #48]
              ^
<stdin>:275:2: note: 'next' match was here
 str q0, [sp, #48]
 ^
<stdin>:271:10: note: previous match ended here
 bl fminl
         ^
<stdin>:272:1: note: non-matching line after previous match is here
 str q0, [sp, #64]
^

Input file: <stdin>
Check file: /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        270:  ldr q1, [sp] // 16-byte Folded Reload 
        271:  bl fminl 
        272:  str q0, [sp, #64] 
        273:  ldr q0, [sp, #16] // 16-byte Folded Reload 
        274:  ldp x9, x8, [sp, #64] 
        275:  str q0, [sp, #48] 
next:636      !~~~~~~~~~~~~~~~~  error: match on wrong line
        276:  ldp x11, x10, [sp, #48] 
        277: .LBB6_3: // %atomicrmw.start 
        278:  // Parent Loop BB6_2 Depth=1 
        279:  // => This Inner Loop Header: Depth=2 
        280:  ldaxp x12, x13, [x19] 
          .
          .
          .
>>>>>>

--

...

llvm-ci · 2024-08-29T15:48:13Z

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/4277

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AArch64/atomicrmw-fmax.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll -o - | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -check-prefix=NOLSE /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -check-prefix=NOLSE /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll -o -
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll:636:15: error: NOLSE-NEXT: is not on the line after the previous match
; NOLSE-NEXT: str q0, [sp, #48]
              ^
<stdin>:275:2: note: 'next' match was here
 str q0, [sp, #48]
 ^
<stdin>:271:10: note: previous match ended here
 bl fmaxl
         ^
<stdin>:272:1: note: non-matching line after previous match is here
 str q0, [sp, #64]
^

Input file: <stdin>
Check file: /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        270:  ldr q1, [sp] // 16-byte Folded Reload 
        271:  bl fmaxl 
        272:  str q0, [sp, #64] 
        273:  ldr q0, [sp, #16] // 16-byte Folded Reload 
        274:  ldp x9, x8, [sp, #64] 
        275:  str q0, [sp, #48] 
next:636      !~~~~~~~~~~~~~~~~  error: match on wrong line
        276:  ldp x11, x10, [sp, #48] 
        277: .LBB6_3: // %atomicrmw.start 
        278:  // Parent Loop BB6_2 Depth=1 
        279:  // => This Inner Loop Header: Depth=2 
        280:  ldaxp x12, x13, [x19] 
          .
          .
          .
>>>>>>

--

...

llvm-ci · 2024-08-29T15:48:52Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/4162

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AArch64/atomicrmw-fmin.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o - | /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/bin/FileCheck -check-prefix=NOLSE /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+ /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/bin/FileCheck -check-prefix=NOLSE /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+ /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o -
�[1m/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll:636:15: �[0m�[0;1;31merror: �[0m�[1mNOLSE-NEXT: is not on the line after the previous match
�[0m; NOLSE-NEXT: str q0, [sp, #48]
�[0;1;32m              ^
�[0m�[1m<stdin>:275:2: �[0m�[0;1;30mnote: �[0m�[1m'next' match was here
�[0m str q0, [sp, #48]
�[0;1;32m ^
�[0m�[1m<stdin>:271:10: �[0m�[0;1;30mnote: �[0m�[1mprevious match ended here
�[0m bl fminl
�[0;1;32m         ^
�[0m�[1m<stdin>:272:1: �[0m�[0;1;30mnote: �[0m�[1mnon-matching line after previous match is here
�[0m str q0, [sp, #64]
�[0;1;32m^
�[0m
Input file: <stdin>
Check file: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
�[1m�[0m�[0;1;30m              1: �[0m�[1m�[0;1;46m .text �[0m
�[0;1;30m              2: �[0m�[1m�[0;1;46m .file "atomicrmw-fmin.ll" �[0m
�[0;1;30m              3: �[0m�[1m�[0;1;46m .globl test_atomicrmw_fmin_f16_seq_cst_align2 // -- Begin function test_atomicrmw_fmin_f16_seq_cst_align2 �[0m
�[0;1;30m              4: �[0m�[1m�[0;1;46m .p2align 2 �[0m
�[0;1;30m              5: �[0m�[1m�[0;1;46m .type test_atomicrmw_fmin_f16_seq_cst_align2,@function �[0m
�[0;1;30m              6: �[0m�[1m�[0;1;46m�[0mtest_atomicrmw_fmin_f16_seq_cst_align2:�[0;1;46m // @test_atomicrmw_fmin_f16_seq_cst_align2 �[0m
�[0;1;32mlabel:10'0       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m�[0;1;32mlabel:10'1       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m�[0;1;30m              7: �[0m�[1m�[0;1;46m�[0m// %bb.0:�[0;1;46m �[0m
�[0;1;32mcheck:11         ^~~~~~~~~
�[0m�[0;1;30m              8: �[0m�[1m�[0;1;46m �[0mfcvt s1, h0�[0;1;46m �[0m
�[0;1;32mnext:12           ^~~~~~~~~~~
�[0m�[0;1;30m              9: �[0m�[1m�[0;1;46m �[0mldr h0, [x0]�[0;1;46m �[0m
�[0;1;32mnext:13           ^~~~~~~~~~~~
�[0m�[0;1;30m             10: �[0m�[1m�[0;1;46m �[0mb .LBB0_2�[0;1;46m �[0m
�[0;1;32mnext:14           ^~~~~~~~~
�[0m�[0;1;30m             11: �[0m�[1m�[0;1;46m�[0m.LBB0_1: // %atomicrmw.start�[0;1;46m �[0m
�[0;1;32mnext:15          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m�[0;1;30m             12: �[0m�[1m�[0;1;46m �[0m// in Loop: Header=BB0_2 Depth=1�[0;1;46m �[0m
�[0;1;32mnext:16           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m�[0;1;30m             13: �[0m�[1m�[0;1;46m �[0mfmov s0, w10�[0;1;46m �[0m
�[0;1;32mnext:17           ^~~~~~~~~~~~
...

llvm-ci · 2024-08-29T17:39:35Z

LLVM Buildbot has detected a new failure on builder lld-x86_64-ubuntu-fast running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/2049

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AArch64/atomicrmw-fmin.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o - | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -check-prefix=NOLSE /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll -o -
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -check-prefix=NOLSE /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll:636:15: error: NOLSE-NEXT: is not on the line after the previous match
; NOLSE-NEXT: str q0, [sp, #48]
              ^
<stdin>:275:2: note: 'next' match was here
 str q0, [sp, #48]
 ^
<stdin>:271:10: note: previous match ended here
 bl fminl
         ^
<stdin>:272:1: note: non-matching line after previous match is here
 str q0, [sp, #64]
^

Input file: <stdin>
Check file: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        270:  ldr q1, [sp] // 16-byte Folded Reload 
        271:  bl fminl 
        272:  str q0, [sp, #64] 
        273:  ldr q0, [sp, #16] // 16-byte Folded Reload 
        274:  ldp x9, x8, [sp, #64] 
        275:  str q0, [sp, #48] 
next:636      !~~~~~~~~~~~~~~~~  error: match on wrong line
        276:  ldp x11, x10, [sp, #48] 
        277: .LBB6_3: // %atomicrmw.start 
        278:  // Parent Loop BB6_2 Depth=1 
        279:  // => This Inner Loop Header: Depth=2 
        280:  ldaxp x12, x13, [x19] 
          .
          .
          .
>>>>>>

--

...

arsenm mentioned this pull request Aug 14, 2024

AArch64: Use consistent atomicrmw expansion for FP operations #103702

Merged

arsenm added the backend:AArch64 label Aug 14, 2024 — with Graphite App

arsenm requested review from aemerson, davemgreen, efriedma-quic, ornata and TNorthover August 14, 2024 06:46

arsenm marked this pull request as ready for review August 14, 2024 06:46

davemgreen requested a review from tmatheson-arm August 14, 2024 08:17

arsenm force-pushed the users/arsenm/aarch64-add-atomicrmw-fp-tests branch 3 times, most recently from 38ce457 to 07be231 Compare August 19, 2024 20:14

efriedma-quic approved these changes Aug 22, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/aarch64-add-atomicrmw-fp-tests branch 4 times, most recently from 07ad2a3 to eb21c2b Compare August 29, 2024 05:20

arsenm commented Aug 29, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/aarch64-add-atomicrmw-fp-tests branch from eb21c2b to 9c924de Compare August 29, 2024 09:50

arsenm added 3 commits August 29, 2024 15:34

AArch64: Add tests for atomicrmw fp operations

5fee612

There were only codegen tests for the fadd vector case, so round out the test coverage for the scalar cases and all the other operations.

Add softfp run lines

289d140

Try to use explicit triples to fix windows bot failure

ff1a79d

arsenm added 2 commits August 29, 2024 15:34

Try more triples to fix windows host failures

fd8d402

Work around windows test failures

03949db

arsenm force-pushed the users/arsenm/aarch64-add-atomicrmw-fp-tests branch from 9c924de to 03949db Compare August 29, 2024 15:34

arsenm merged commit 4ee2ad2 into main Aug 29, 2024
4 of 5 checks passed

arsenm deleted the users/arsenm/aarch64-add-atomicrmw-fp-tests branch August 29, 2024 15:36

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

AArch64: Add tests for atomicrmw fp operations #103701

AArch64: Add tests for atomicrmw fp operations #103701

arsenm commented Aug 14, 2024

arsenm commented Aug 14, 2024

llvmbot commented Aug 14, 2024

davemgreen commented Aug 14, 2024

arsenm commented Aug 14, 2024

tmatheson-arm commented Aug 15, 2024 •

edited

Loading

arsenm commented Aug 22, 2024

efriedma-quic left a comment

arsenm left a comment

arsenm commented Aug 29, 2024

arsenm commented Aug 29, 2024 •

edited

Loading

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

AArch64: Add tests for atomicrmw fp operations #103701

AArch64: Add tests for atomicrmw fp operations #103701

Conversation

arsenm commented Aug 14, 2024

arsenm commented Aug 14, 2024

llvmbot commented Aug 14, 2024

davemgreen commented Aug 14, 2024

arsenm commented Aug 14, 2024

tmatheson-arm commented Aug 15, 2024 • edited Loading

arsenm commented Aug 22, 2024

efriedma-quic left a comment

Choose a reason for hiding this comment

arsenm left a comment

Choose a reason for hiding this comment

arsenm commented Aug 29, 2024

arsenm commented Aug 29, 2024 • edited Loading

Merge activity

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

llvm-ci commented Aug 29, 2024

tmatheson-arm commented Aug 15, 2024 •

edited

Loading

arsenm commented Aug 29, 2024 •

edited

Loading