diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 494db0493dacab..e098e0d2f5f5a8 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -835,11 +835,11 @@ static InstructionsState getSameOpcode(ArrayRef VL, auto *CallBase = cast(IBase); if (Call->getCalledFunction() != CallBase->getCalledFunction()) return InstructionsState(VL[BaseIndex], nullptr, nullptr); - if (Call->hasOperandBundles() && + if (Call->hasOperandBundles() && (!CallBase->hasOperandBundles() || !std::equal(Call->op_begin() + Call->getBundleOperandsStartIndex(), Call->op_begin() + Call->getBundleOperandsEndIndex(), CallBase->op_begin() + - CallBase->getBundleOperandsStartIndex())) + CallBase->getBundleOperandsStartIndex()))) return InstructionsState(VL[BaseIndex], nullptr, nullptr); Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, &TLI); if (ID != BaseID) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/uselistorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/uselistorder.ll new file mode 100644 index 00000000000000..3a68a37c9f82ca --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/uselistorder.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test has UB but the crash in #95016 only happens with it +define void @uselistorder_test() { +; CHECK-LABEL: @uselistorder_test( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double 0.000000e+00, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], zeroinitializer +; CHECK-NEXT: store <2 x double> [[TMP8]], ptr null, align 8 +; CHECK-NEXT: ret void +; + %max1 = call double @llvm.maximum.f64(double 0.000000e+00, double 0.000000e+00) [ "a_list"(ptr null) ] + %add1 = fadd double %max1, 0.000000e+00 + %mul1 = fmul double 0.000000e+00, %add1 + %mul2 = fmul double %mul1, 0.000000e+00 + %sel1 = select i1 false, double 0.000000e+00, double %mul2 + %max2 = call double @llvm.maximum.f64(double 0.000000e+00, double 0.000000e+00) + %add2 = fadd double %max2, 0.000000e+00 + %mul3 = fmul double 0.000000e+00, %add2 + %mul4 = fmul double %mul3, 0.000000e+00 + %sel2 = select i1 false, double 0.000000e+00, double %mul4 + %mul5 = fmul double %sel2, 0.000000e+00 + %add3 = fadd double 0.000000e+00, %mul5 + %gep1 = getelementptr { double, [1 x [2 x double]] }, ptr null, i64 0, i32 1 + store double %add3, ptr %gep1, align 8 + %mul6 = fmul double %sel1, 0.000000e+00 + %add4 = fadd double %mul6, 0.000000e+00 + store double %add4, ptr null, align 8 + ret void +} + +declare double @llvm.maximum.f64(double, double) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }