Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 10, 2025

Based off feedback from #171478

…iginally legal f64 values that we can store directly.

Based off feedback from llvm#171478
@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Based off feedback from #171478


Patch is 63.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171602.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+8-1)
  • (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+204-464)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fbd875a93fd4a..ebb9119362bc6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33162,7 +33162,14 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
     // For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
     // is enabled.
     if (VT == MVT::i64) {
-      if (Subtarget.hasSSE1()) {
+      SDValue BCValue = peekThroughBitcasts(Node->getVal());
+      if (BCValue.getValueType() == MVT::f64 &&
+          (Subtarget.hasX87() || Subtarget.hasSSE2())) {
+        // If the i64 was bitcast from a f64 then we can do the f64 atomic store
+        // directly with FSTPL/MOVSD.
+        Chain = DAG.getStore(Node->getChain(), dl, BCValue, Node->getBasePtr(),
+                             Node->getMemOperand());
+      } else if (Subtarget.hasSSE1()) {
         SDValue SclToVec =
             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
         MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index fe79dfe39f645..2dee1d12e7255 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -80,23 +80,17 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    subl $24, %esp
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
 ; X86-NOSSE-NEXT:    fildll (%eax)
 ; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    faddl 12(%ebp)
-; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %ecx, (%esp)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll (%eax)
+; X86-NOSSE-NEXT:    fldl (%esp)
+; X86-NOSSE-NEXT:    faddl 12(%ebp)
+; X86-NOSSE-NEXT:    fstpl (%eax)
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
 ; X86-NOSSE-NEXT:    retl
@@ -109,16 +103,13 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
 ; X86-SSE1-NEXT:    subl $16, %esp
 ; X86-SSE1-NEXT:    movl 8(%ebp), %eax
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fldl (%esp)
 ; X86-SSE1-NEXT:    faddl 12(%ebp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE1-NEXT:    fstpl (%eax)
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -132,9 +123,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
 ; X86-SSE2-NEXT:    movl 8(%ebp), %eax
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE2-NEXT:    movsd %xmm0, (%eax)
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -148,9 +137,7 @@ define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
 ; X86-AVX-NEXT:    movl 8(%ebp), %eax
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT:    vmovsd %xmm0, (%eax)
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -246,22 +233,16 @@ define dso_local void @fadd_64g() nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    subl $24, %esp
 ; X86-NOSSE-NEXT:    fildll glob64
 ; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fld1
-; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl %eax, (%esp)
-; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll glob64
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    faddl (%esp)
+; X86-NOSSE-NEXT:    fstpl glob64
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
 ; X86-NOSSE-NEXT:    retl
@@ -273,16 +254,13 @@ define dso_local void @fadd_64g() nounwind {
 ; X86-SSE1-NEXT:    andl $-8, %esp
 ; X86-SSE1-NEXT:    subl $16, %esp
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fld1
 ; X86-SSE1-NEXT:    faddl (%esp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, glob64
+; X86-SSE1-NEXT:    fstpl glob64
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -295,9 +273,7 @@ define dso_local void @fadd_64g() nounwind {
 ; X86-SSE2-NEXT:    subl $8, %esp
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, glob64
+; X86-SSE2-NEXT:    movsd %xmm0, glob64
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -310,9 +286,7 @@ define dso_local void @fadd_64g() nounwind {
 ; X86-AVX-NEXT:    subl $8, %esp
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, glob64
+; X86-AVX-NEXT:    vmovsd %xmm0, glob64
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -409,22 +383,16 @@ define dso_local void @fadd_64imm() nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    subl $24, %esp
 ; X86-NOSSE-NEXT:    fildll -559038737
 ; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fld1
-; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl %eax, (%esp)
-; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll -559038737
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    faddl (%esp)
+; X86-NOSSE-NEXT:    fstpl -559038737
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
 ; X86-NOSSE-NEXT:    retl
@@ -436,16 +404,13 @@ define dso_local void @fadd_64imm() nounwind {
 ; X86-SSE1-NEXT:    andl $-8, %esp
 ; X86-SSE1-NEXT:    subl $16, %esp
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fld1
 ; X86-SSE1-NEXT:    faddl (%esp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, -559038737
+; X86-SSE1-NEXT:    fstpl -559038737
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -458,9 +423,7 @@ define dso_local void @fadd_64imm() nounwind {
 ; X86-SSE2-NEXT:    subl $8, %esp
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, -559038737
+; X86-SSE2-NEXT:    movsd %xmm0, -559038737
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -473,9 +436,7 @@ define dso_local void @fadd_64imm() nounwind {
 ; X86-AVX-NEXT:    subl $8, %esp
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
+; X86-AVX-NEXT:    vmovsd %xmm0, -559038737
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -577,22 +538,16 @@ define dso_local void @fadd_64stack() nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $40, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
 ; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
 ; X86-NOSSE-NEXT:    fld1
-; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    faddl (%esp)
 ; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT:    movl %eax, (%esp)
-; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
 ; X86-NOSSE-NEXT:    retl
@@ -604,16 +559,13 @@ define dso_local void @fadd_64stack() nounwind {
 ; X86-SSE1-NEXT:    andl $-8, %esp
 ; X86-SSE1-NEXT:    subl $24, %esp
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fld1
 ; X86-SSE1-NEXT:    faddl (%esp)
 ; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -627,8 +579,6 @@ define dso_local void @fadd_64stack() nounwind {
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
 ; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -642,8 +592,6 @@ define dso_local void @fadd_64stack() nounwind {
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -677,7 +625,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    pushl %esi
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $40, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
 ; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
 ; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
@@ -685,16 +633,10 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    faddl 12(%ebp)
-; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOSSE-NEXT:    movl %edx, (%esp)
-; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT:    fldl (%esp)
+; X86-NOSSE-NEXT:    faddl 12(%ebp)
+; X86-NOSSE-NEXT:    fstpl (%ecx,%eax,8)
 ; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
 ; X86-NOSSE-NEXT:    popl %esi
 ; X86-NOSSE-NEXT:    popl %ebp
@@ -709,16 +651,13 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
 ; X86-SSE1-NEXT:    movl 20(%ebp), %eax
 ; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fldl (%esp)
 ; X86-SSE1-NEXT:    faddl 12(%ebp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT:    fstpl (%ecx,%eax,8)
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -733,9 +672,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
 ; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE2-NEXT:    movsd %xmm0, (%ecx,%eax,8)
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -750,9 +687,7 @@ define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
 ; X86-AVX-NEXT:    movl 8(%ebp), %ecx
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
+; X86-AVX-NEXT:    vmovsd %xmm0, (%ecx,%eax,8)
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -852,23 +787,17 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    subl $24, %esp
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
 ; X86-NOSSE-NEXT:    fildll (%eax)
 ; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fsubl 12(%ebp)
-; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %ecx, (%esp)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT:    fildll (%esp)
-; X86-NOSSE-NEXT:    fistpll (%eax)
+; X86-NOSSE-NEXT:    fldl (%esp)
+; X86-NOSSE-NEXT:    fsubl 12(%ebp)
+; X86-NOSSE-NEXT:    fstpl (%eax)
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
 ; X86-NOSSE-NEXT:    retl
@@ -881,16 +810,13 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
 ; X86-SSE1-NEXT:    subl $16, %esp
 ; X86-SSE1-NEXT:    movl 8(%ebp), %eax
 ; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
-; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
-; X86-SSE1-NEXT:    movss %xmm1, (%esp)
-; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movss %xmm0, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    fldl (%esp)
 ; X86-SSE1-NEXT:    fsubl 12(%ebp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
-; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE1-NEXT:    fstpl (%eax)
 ; X86-SSE1-NEXT:    movl %ebp, %esp
 ; X86-SSE1-NEXT:    popl %ebp
 ; X86-SSE1-NEXT:    retl
@@ -904,9 +830,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
 ; X86-SSE2-NEXT:    movl 8(%ebp), %eax
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-SSE2-NEXT:    subsd 12(%ebp), %xmm0
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE2-NEXT:    movsd %xmm0, (%eax)
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
@@ -920,9 +844,7 @@ define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
 ; X86-AVX-NEXT:    movl 8(%ebp), %eax
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-AVX-NEXT:    vsubsd 12(%ebp), %xmm0, %xmm0
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT:    vmovsd %xmm0, (%eax)
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
 ; X86-AVX-NEXT:    retl
@@ -1018,23 +940,17 @@ define dso_local void @fsub_64g() nounwind {
 ; X86-NOSSE-NEXT:    pushl %ebp
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT...
[truncated]

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon enabled auto-merge (squash) December 11, 2025 08:38
@RKSimon RKSimon merged commit 6573f62 into llvm:main Dec 11, 2025
7 of 9 checks passed
@RKSimon RKSimon deleted the x86-atomic-store-i64-f64 branch December 11, 2025 09:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants